From 7db0e62afd3fa0779ffa294214017d2c6f0c98d6 Mon Sep 17 00:00:00 2001 From: Ruth Comer Date: Mon, 11 Jul 2022 14:32:53 +0100 Subject: [PATCH 01/10] Re-write pearsonr to use Resolve --- docs/src/whatsnew/latest.rst | 2 + lib/iris/analysis/stats.py | 150 +++++++++--------- lib/iris/common/resolve.py | 2 + .../unit/analysis/stats/test_pearsonr.py | 37 ++--- 4 files changed, 99 insertions(+), 92 deletions(-) diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst index d7e9a40648d..7bc7bd3a53b 100644 --- a/docs/src/whatsnew/latest.rst +++ b/docs/src/whatsnew/latest.rst @@ -100,6 +100,8 @@ This document explains the changes made to Iris for this release lazy data from file. This will also speed up coordinate comparison. (:pull:`5610`) +#. `@rcomer`_ modified :func:`~iris.analysis.stats.pearsonr` so it preserves + lazy data in all cases and also runs a little faster. (:pull:`5638`) 🔥 Deprecations =============== diff --git a/lib/iris/analysis/stats.py b/lib/iris/analysis/stats.py index 33eb1713369..12d460cb360 100644 --- a/lib/iris/analysis/stats.py +++ b/lib/iris/analysis/stats.py @@ -4,11 +4,12 @@ # See LICENSE in the root of the repository for full licensing details. """Statistical operations between cubes.""" +import dask.array as da import numpy as np -import numpy.ma as ma import iris -from iris.util import broadcast_to_shape +from iris.common import Resolve +from iris.util import _mask_array def pearsonr( @@ -63,13 +64,13 @@ def pearsonr( Notes ----- + If either of the input cubes has lazy data, the result will have lazy data. + Reference: https://en.wikipedia.org/wiki/Pearson_correlation_coefficient - This operation is non-lazy. - """ - # Assign larger cube to cube_1 + # Assign larger cube to cube_1 for simplicity. if cube_b.ndim > cube_a.ndim: cube_1 = cube_b cube_2 = cube_a @@ -79,90 +80,91 @@ def pearsonr( smaller_shape = cube_2.shape - dim_coords_1 = [coord.name() for coord in cube_1.dim_coords] - dim_coords_2 = [coord.name() for coord in cube_2.dim_coords] - common_dim_coords = list(set(dim_coords_1) & set(dim_coords_2)) + # Get the broadcast, auto-transposed safe versions of the cube operands. + resolver = Resolve(cube_1, cube_2) + cube_1 = resolver.lhs_cube_resolved + cube_2 = resolver.rhs_cube_resolved + + if cube_1.has_lazy_data() or cube_2.has_lazy_data(): + al = da + array_1 = cube_1.lazy_data() + array_2 = cube_2.lazy_data() + else: + al = np + array_1 = cube_1.data + array_2 = cube_2.data + # If no coords passed then set to all common dimcoords of cubes. if corr_coords is None: - corr_coords = common_dim_coords - - def _ones_like(cube): - # Return a copy of cube with the same mask, but all data values set to 1. - # The operation is non-lazy. - # For safety we also discard any cell-measures and ancillary-variables, to - # avoid cube arithmetic possibly objecting to them, or inadvertently retaining - # them in the result where they might be inappropriate. - ones_cube = cube.copy() - ones_cube.data = np.ones_like(cube.data) - ones_cube.rename("unknown") - ones_cube.units = 1 - for cm in ones_cube.cell_measures(): - ones_cube.remove_cell_measure(cm) - for av in ones_cube.ancillary_variables(): - ones_cube.remove_ancillary_variable(av) - return ones_cube + dim_coords_1 = {coord.name() for coord in cube_1.dim_coords} + dim_coords_2 = {coord.name() for coord in cube_2.dim_coords} + corr_coords = list(dim_coords_1.intersection(dim_coords_2)) + + # Interpret coords as array dimensions. + corr_dims = set() + for coord in corr_coords: + corr_dims.update(cube_1.coord_dims(coord)) + + corr_dims = tuple(corr_dims) # Match up data masks if required. if common_mask: - # Create a cube of 1's with a common mask. - if ma.is_masked(cube_2.data): - mask_cube = _ones_like(cube_2) - else: - mask_cube = 1.0 - if ma.is_masked(cube_1.data): - # Take a slice to avoid unnecessary broadcasting of cube_2. - slice_coords = [ - dim_coords_1[i] - for i in range(cube_1.ndim) - if dim_coords_1[i] not in common_dim_coords - and np.array_equal( - cube_1.data.mask.any(axis=i), cube_1.data.mask.all(axis=i) - ) - ] - cube_1_slice = next(cube_1.slices_over(slice_coords)) - mask_cube = _ones_like(cube_1_slice) * mask_cube - # Apply common mask to data. - if isinstance(mask_cube, iris.cube.Cube): - cube_1 = cube_1 * mask_cube - cube_2 = mask_cube * cube_2 - dim_coords_2 = [coord.name() for coord in cube_2.dim_coords] - - # Broadcast weights to shape of cubes if necessary. - if weights is None or cube_1.shape == smaller_shape: - weights_1 = weights - weights_2 = weights + mask_1 = al.ma.getmaskarray(array_1) + if al is np: + # Reduce all invariant dimensions of mask_1 to length 1. This avoids + # unnecessary broadcasting of array_2. + index = tuple( + slice(0, 1) + if np.array_equal(mask_1.any(axis=dim), mask_1.all(axis=dim)) + else slice(None) + for dim in range(mask_1.ndim) + ) + mask_1 = mask_1[index] + + array_2 = _mask_array(array_2, mask_1) + array_1 = _mask_array(array_1, al.ma.getmaskarray(array_2)) + + # Broadcast weights to shape of arrays if necessary. + if weights is None: + weights_1 = weights_2 = None else: if weights.shape != smaller_shape: - raise ValueError( - "weights array should have dimensions {}".format(smaller_shape) - ) + msg = f"weights array should have dimensions {smaller_shape}" + raise ValueError(msg) + + if resolver.reorder_src_dims is not None: + # Apply same transposition as was done to cube_2 within Resolve. + weights = weights.transpose(resolver.reorder_src_dims) + + # Reshape to add in any length-1 dimensions needed for broadcasting. + weights = weights.reshape(cube_2.shape) - dims_1_common = [ - i for i in range(cube_1.ndim) if dim_coords_1[i] in common_dim_coords - ] - weights_1 = broadcast_to_shape(weights, cube_1.shape, dims_1_common) - if cube_2.shape != smaller_shape: - dims_2_common = [ - i for i in range(cube_2.ndim) if dim_coords_2[i] in common_dim_coords - ] - weights_2 = broadcast_to_shape(weights, cube_2.shape, dims_2_common) - else: - weights_2 = weights + weights_2 = np.broadcast_to(weights, array_2.shape) + weights_1 = np.broadcast_to(weights, array_1.shape) # Calculate correlations. - s1 = cube_1 - cube_1.collapsed(corr_coords, iris.analysis.MEAN, weights=weights_1) - s2 = cube_2 - cube_2.collapsed(corr_coords, iris.analysis.MEAN, weights=weights_2) + s1 = array_1 - al.ma.average( + array_1, axis=corr_dims, weights=weights_1, keepdims=True + ) + s2 = array_2 - al.ma.average( + array_2, axis=corr_dims, weights=weights_2, keepdims=True + ) - covar = (s1 * s2).collapsed( + s_prod = resolver.cube(s1 * s2) + + # Use cube collapsed method as it takes care of coordinate collapsing and missing + # data tolerance. + covar = s_prod.collapsed( corr_coords, iris.analysis.SUM, weights=weights_1, mdtol=mdtol ) - var_1 = (s1**2).collapsed(corr_coords, iris.analysis.SUM, weights=weights_1) - var_2 = (s2**2).collapsed(corr_coords, iris.analysis.SUM, weights=weights_2) - denom = iris.analysis.maths.apply_ufunc( - np.sqrt, var_1 * var_2, new_unit=covar.units - ) + var_1 = iris.analysis._sum(s1**2, axis=corr_dims, weights=weights_1) + var_2 = iris.analysis._sum(s2**2, axis=corr_dims, weights=weights_2) + + denom = np.sqrt(var_1 * var_2) + corr_cube = covar / denom corr_cube.rename("Pearson's r") + corr_cube.units = 1 return corr_cube diff --git a/lib/iris/common/resolve.py b/lib/iris/common/resolve.py index b35397ee587..09555ef9727 100644 --- a/lib/iris/common/resolve.py +++ b/lib/iris/common/resolve.py @@ -304,6 +304,7 @@ def __init__(self, lhs=None, rhs=None): #: operands, where the direction of the mapping is governed by #: :attr:`~iris.common.resolve.Resolve.map_rhs_to_lhs`. self.mapping = None # set in _metadata_mapping + self.reorder_src_dims = None # set in _as_compatible_cubes #: Cache containing a list of dim, aux and scalar coordinates prepared #: and ready for creating and attaching to the resultant resolved @@ -439,6 +440,7 @@ def _as_compatible_cubes(self): # Determine whether a transpose of the src cube is necessary. if order != sorted(order): + self.reorder_src_dims = order new_src_data = new_src_data.transpose(order) logger.debug( f"transpose src {self._src_cube_position} cube with order {order}" diff --git a/lib/iris/tests/unit/analysis/stats/test_pearsonr.py b/lib/iris/tests/unit/analysis/stats/test_pearsonr.py index 50387e14182..9eb6e44bee3 100644 --- a/lib/iris/tests/unit/analysis/stats/test_pearsonr.py +++ b/lib/iris/tests/unit/analysis/stats/test_pearsonr.py @@ -12,12 +12,13 @@ import numpy.ma as ma import iris +import iris._lazy_data import iris.analysis.stats as stats from iris.exceptions import CoordinateNotFoundError @tests.skip_data -class Test(tests.IrisTest): +class TestLazy(tests.IrisTest): def setUp(self): # 3D cubes: cube_temp = iris.load_cube( @@ -126,10 +127,7 @@ def test_non_existent_coord(self): def test_mdtol(self): cube_small = self.cube_a[:, 0, 0] - cube_small_masked = cube_small.copy() - cube_small_masked.data = ma.array( - cube_small.data, mask=np.array([0, 0, 0, 1, 1, 1], dtype=bool) - ) + cube_small_masked = iris.util.mask_cube(cube_small, [0, 0, 0, 1, 1, 1]) r1 = stats.pearsonr(cube_small, cube_small_masked) r2 = stats.pearsonr(cube_small, cube_small_masked, mdtol=0.49) self.assertArrayAlmostEqual(r1.data, np.array([0.74586593])) @@ -137,25 +135,21 @@ def test_mdtol(self): def test_common_mask_simple(self): cube_small = self.cube_a[:, 0, 0] - cube_small_masked = cube_small.copy() - cube_small_masked.data = ma.array( - cube_small.data, mask=np.array([0, 0, 0, 1, 1, 1], dtype=bool) - ) + cube_small_masked = iris.util.mask_cube(cube_small, [0, 0, 0, 1, 1, 1]) r = stats.pearsonr(cube_small, cube_small_masked, common_mask=True) self.assertArrayAlmostEqual(r.data, np.array([1.0])) def test_common_mask_broadcast(self): - cube_small = self.cube_a[:, 0, 0] + cube_small = iris.util.mask_cube(self.cube_a[:, 0, 0], [0, 0, 0, 0, 0, 1]) + mask_2d = np.zeros((6, 2), dtype=bool) + # 2d mask varies on unshared coord: + mask_2d[0, 1] = 1 + cube_small_2d = self.cube_a[:, 0:2, 0] - cube_small.data = ma.array( - cube_small.data, mask=np.array([0, 0, 0, 0, 0, 1], dtype=bool) - ) - cube_small_2d.data = ma.array( - np.tile(cube_small.data[:, np.newaxis], 2), - mask=np.zeros((6, 2), dtype=bool), + cube_small_2d.data = iris.util._mask_array( + cube_small.core_data().reshape(6, 1), mask_2d ) - # 2d mask varies on unshared coord: - cube_small_2d.data.mask[0, 1] = 1 + r = stats.pearsonr( cube_small, cube_small_2d, @@ -169,5 +163,12 @@ def test_common_mask_broadcast(self): self.assertArrayAlmostEqual(r.data, np.array([1.0, 1.0])) +class TestReal(TestLazy): + def setUp(self): + super().setUp() + for cube in [self.cube_a, self.cube_b]: + cube.data + + if __name__ == "__main__": tests.main() From 844ec33ea4069dedcd23805486ba041401736b12 Mon Sep 17 00:00:00 2001 From: Ruth Comer Date: Fri, 15 Dec 2023 10:44:16 +0000 Subject: [PATCH 02/10] add test for weight transposing case --- .../unit/analysis/stats/test_pearsonr.py | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/lib/iris/tests/unit/analysis/stats/test_pearsonr.py b/lib/iris/tests/unit/analysis/stats/test_pearsonr.py index 9eb6e44bee3..e709b36e895 100644 --- a/lib/iris/tests/unit/analysis/stats/test_pearsonr.py +++ b/lib/iris/tests/unit/analysis/stats/test_pearsonr.py @@ -112,6 +112,26 @@ def test_broadcast_cubes_weighted(self): ] self.assertArrayAlmostEqual(r.data, np.array(r_by_slice)) + def test_broadcast_transpose_cubes_weighted(self): + # Reference is calculated with no transposition. + r_ref = stats.pearsonr( + self.cube_a, + self.cube_b[0, :, :], + ["latitude", "longitude"], + weights=self.weights[0, :, :], + ) + + self.cube_a.transpose() + r_test = stats.pearsonr( + self.cube_a, + self.cube_b[0, :, :], + ["latitude", "longitude"], + weights=self.weights[0, :, :], + ) + + # Should get the same result, but transposed. + self.assertArrayAlmostEqual(r_test.data, r_ref.data.T) + def test_weight_error(self): with self.assertRaises(ValueError): stats.pearsonr( From 0bca68a2a04ebba3f09eba45762b484e5a917525 Mon Sep 17 00:00:00 2001 From: Ruth Comer Date: Wed, 20 Dec 2023 13:19:24 +0000 Subject: [PATCH 03/10] Fix single coordinate case --- lib/iris/analysis/stats.py | 2 ++ lib/iris/tests/unit/analysis/stats/test_pearsonr.py | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/lib/iris/analysis/stats.py b/lib/iris/analysis/stats.py index 12d460cb360..7db21bac7a3 100644 --- a/lib/iris/analysis/stats.py +++ b/lib/iris/analysis/stats.py @@ -102,6 +102,8 @@ def pearsonr( # Interpret coords as array dimensions. corr_dims = set() + if isinstance(corr_coords, str): + corr_coords = [corr_coords] for coord in corr_coords: corr_dims.update(cube_1.coord_dims(coord)) diff --git a/lib/iris/tests/unit/analysis/stats/test_pearsonr.py b/lib/iris/tests/unit/analysis/stats/test_pearsonr.py index e709b36e895..0d66635fc1b 100644 --- a/lib/iris/tests/unit/analysis/stats/test_pearsonr.py +++ b/lib/iris/tests/unit/analysis/stats/test_pearsonr.py @@ -60,6 +60,10 @@ def test_compatible_cubes(self): ], ) + def test_single_coord(self): + # Smoke test that single coord can be passed as single string. + stats.pearsonr(self.cube_a, self.cube_b, "latitude") + def test_broadcast_cubes(self): r1 = stats.pearsonr( self.cube_a, self.cube_b[0, :, :], ["latitude", "longitude"] From 3bf0f251006282d47b0cff34b8325069fffd1d30 Mon Sep 17 00:00:00 2001 From: Ruth Comer Date: Wed, 20 Dec 2023 13:56:12 +0000 Subject: [PATCH 04/10] use lenient coord handling --- lib/iris/analysis/stats.py | 4 +++- lib/iris/tests/unit/analysis/stats/test_pearsonr.py | 4 ++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/lib/iris/analysis/stats.py b/lib/iris/analysis/stats.py index 7db21bac7a3..96729b47f38 100644 --- a/lib/iris/analysis/stats.py +++ b/lib/iris/analysis/stats.py @@ -8,10 +8,12 @@ import numpy as np import iris -from iris.common import Resolve +from iris.common import SERVICES, Resolve +from iris.common.lenient import _lenient_client from iris.util import _mask_array +@_lenient_client(services=SERVICES) def pearsonr( cube_a, cube_b, diff --git a/lib/iris/tests/unit/analysis/stats/test_pearsonr.py b/lib/iris/tests/unit/analysis/stats/test_pearsonr.py index 0d66635fc1b..908eafe4771 100644 --- a/lib/iris/tests/unit/analysis/stats/test_pearsonr.py +++ b/lib/iris/tests/unit/analysis/stats/test_pearsonr.py @@ -193,6 +193,10 @@ def setUp(self): for cube in [self.cube_a, self.cube_b]: cube.data + def test_lenient_handling(self): + self.cube_a.coord("time").var_name = "wibble" + stats.pearsonr(self.cube_a, self.cube_b) + if __name__ == "__main__": tests.main() From 09e83f73af796f4fe43c341355db0bfe1dfbfbd2 Mon Sep 17 00:00:00 2001 From: Ruth Comer Date: Wed, 20 Dec 2023 14:09:02 +0000 Subject: [PATCH 05/10] Move coordinate handling tests to separate class These do not need real and lazy versions --- .../unit/analysis/stats/test_pearsonr.py | 25 +++++++++++-------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/lib/iris/tests/unit/analysis/stats/test_pearsonr.py b/lib/iris/tests/unit/analysis/stats/test_pearsonr.py index 908eafe4771..6a0ffadebc4 100644 --- a/lib/iris/tests/unit/analysis/stats/test_pearsonr.py +++ b/lib/iris/tests/unit/analysis/stats/test_pearsonr.py @@ -17,8 +17,7 @@ from iris.exceptions import CoordinateNotFoundError -@tests.skip_data -class TestLazy(tests.IrisTest): +class Mixin: def setUp(self): # 3D cubes: cube_temp = iris.load_cube( @@ -34,6 +33,9 @@ def setUp(self): cube_temp.coord("longitude").guess_bounds() self.weights = iris.analysis.cartography.area_weights(cube_temp) + +@tests.skip_data +class TestLazy(Mixin, tests.IrisTest): def test_perfect_corr(self): r = stats.pearsonr(self.cube_a, self.cube_a, ["latitude", "longitude"]) self.assertArrayEqual(r.data, np.array([1.0] * 6)) @@ -42,10 +44,6 @@ def test_perfect_corr_all_dims(self): r = stats.pearsonr(self.cube_a, self.cube_a) self.assertArrayEqual(r.data, np.array([1.0])) - def test_incompatible_cubes(self): - with self.assertRaises(ValueError): - stats.pearsonr(self.cube_a[:, 0, :], self.cube_b[0, :, :], "longitude") - def test_compatible_cubes(self): r = stats.pearsonr(self.cube_a, self.cube_b, ["latitude", "longitude"]) self.assertArrayAlmostEqual( @@ -60,10 +58,6 @@ def test_compatible_cubes(self): ], ) - def test_single_coord(self): - # Smoke test that single coord can be passed as single string. - stats.pearsonr(self.cube_a, self.cube_b, "latitude") - def test_broadcast_cubes(self): r1 = stats.pearsonr( self.cube_a, self.cube_b[0, :, :], ["latitude", "longitude"] @@ -193,10 +187,21 @@ def setUp(self): for cube in [self.cube_a, self.cube_b]: cube.data + +class TestCoordHandling(Mixin, tests.IrisTest): def test_lenient_handling(self): + # Smoke test that mismatched var_name does not prevent operation. self.cube_a.coord("time").var_name = "wibble" stats.pearsonr(self.cube_a, self.cube_b) + def test_incompatible_cubes(self): + with self.assertRaises(ValueError): + stats.pearsonr(self.cube_a[:, 0, :], self.cube_b[0, :, :], "longitude") + + def test_single_coord(self): + # Smoke test that single coord can be passed as single string. + stats.pearsonr(self.cube_a, self.cube_b, "latitude") + if __name__ == "__main__": tests.main() From 23730352cdf023414352fec0f433927373ccd05a Mon Sep 17 00:00:00 2001 From: Ruth Comer <10599679+rcomer@users.noreply.github.com> Date: Fri, 22 Dec 2023 10:34:07 +0000 Subject: [PATCH 06/10] Make some syntax highlighters happier Co-authored-by: Martin Yeo <40734014+trexfeathers@users.noreply.github.com> --- lib/iris/tests/unit/analysis/stats/test_pearsonr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/iris/tests/unit/analysis/stats/test_pearsonr.py b/lib/iris/tests/unit/analysis/stats/test_pearsonr.py index 6a0ffadebc4..12de98debce 100644 --- a/lib/iris/tests/unit/analysis/stats/test_pearsonr.py +++ b/lib/iris/tests/unit/analysis/stats/test_pearsonr.py @@ -185,7 +185,7 @@ class TestReal(TestLazy): def setUp(self): super().setUp() for cube in [self.cube_a, self.cube_b]: - cube.data + _ = cube.data class TestCoordHandling(Mixin, tests.IrisTest): From b3b9ddeeaffb7372481815599c6a1469b7694307 Mon Sep 17 00:00:00 2001 From: Ruth Comer Date: Fri, 22 Dec 2023 16:03:36 +0000 Subject: [PATCH 07/10] Initial review actions --- .../tests/unit/analysis/stats/test_pearsonr.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/lib/iris/tests/unit/analysis/stats/test_pearsonr.py b/lib/iris/tests/unit/analysis/stats/test_pearsonr.py index 12de98debce..f824d6930f4 100644 --- a/lib/iris/tests/unit/analysis/stats/test_pearsonr.py +++ b/lib/iris/tests/unit/analysis/stats/test_pearsonr.py @@ -139,10 +139,6 @@ def test_weight_error(self): weights=self.weights, ) - def test_non_existent_coord(self): - with self.assertRaises(CoordinateNotFoundError): - stats.pearsonr(self.cube_a, self.cube_b, "bad_coord") - def test_mdtol(self): cube_small = self.cube_a[:, 0, 0] cube_small_masked = iris.util.mask_cube(cube_small, [0, 0, 0, 1, 1, 1]) @@ -163,10 +159,13 @@ def test_common_mask_broadcast(self): # 2d mask varies on unshared coord: mask_2d[0, 1] = 1 + # Make a (6, 2) cube. cube_small_2d = self.cube_a[:, 0:2, 0] - cube_small_2d.data = iris.util._mask_array( - cube_small.core_data().reshape(6, 1), mask_2d + # Duplicate data along unshared coord's dimension. + new_data = iris.util.broadcast_to_shape( + cube_small.core_data(), (6, 2), dim_map=[0] ) + cube_small_2d.data = iris.util._mask_array(new_data, mask_2d) r = stats.pearsonr( cube_small, @@ -202,6 +201,10 @@ def test_single_coord(self): # Smoke test that single coord can be passed as single string. stats.pearsonr(self.cube_a, self.cube_b, "latitude") + def test_non_existent_coord(self): + with self.assertRaises(CoordinateNotFoundError): + stats.pearsonr(self.cube_a, self.cube_b, "bad_coord") + if __name__ == "__main__": tests.main() From 49a9d7b788ab0a4d5291b80831f1314eadd4c467 Mon Sep 17 00:00:00 2001 From: Ruth Comer <10599679+rcomer@users.noreply.github.com> Date: Fri, 12 Jan 2024 10:23:29 +0000 Subject: [PATCH 08/10] Clearer comment Co-authored-by: Martin Yeo <40734014+trexfeathers@users.noreply.github.com> --- lib/iris/analysis/stats.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/iris/analysis/stats.py b/lib/iris/analysis/stats.py index 96729b47f38..e165cc27113 100644 --- a/lib/iris/analysis/stats.py +++ b/lib/iris/analysis/stats.py @@ -140,7 +140,8 @@ def pearsonr( # Apply same transposition as was done to cube_2 within Resolve. weights = weights.transpose(resolver.reorder_src_dims) - # Reshape to add in any length-1 dimensions needed for broadcasting. + # Reshape to add in any length-1 dimensions that Resolve() has added + # for broadcasting. weights = weights.reshape(cube_2.shape) weights_2 = np.broadcast_to(weights, array_2.shape) From f1c970afd915412e3fbfdeaeb1a800f33fb89b7e Mon Sep 17 00:00:00 2001 From: Ruth Comer Date: Fri, 12 Jan 2024 12:42:12 +0000 Subject: [PATCH 09/10] assign reviewer blame --- docs/src/whatsnew/latest.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst index 7bc7bd3a53b..36bb5bba1d7 100644 --- a/docs/src/whatsnew/latest.rst +++ b/docs/src/whatsnew/latest.rst @@ -100,8 +100,9 @@ This document explains the changes made to Iris for this release lazy data from file. This will also speed up coordinate comparison. (:pull:`5610`) -#. `@rcomer`_ modified :func:`~iris.analysis.stats.pearsonr` so it preserves - lazy data in all cases and also runs a little faster. (:pull:`5638`) +#. `@rcomer`_ and `@trexfeathers`_ (reviewer) modified + :func:`~iris.analysis.stats.pearsonr` so it preserves lazy data in all cases + and also runs a little faster. (:pull:`5638`) 🔥 Deprecations =============== From 045ce020ca85803bf89d1930c4422dd963c27465 Mon Sep 17 00:00:00 2001 From: Ruth Comer Date: Fri, 12 Jan 2024 17:00:56 +0000 Subject: [PATCH 10/10] first attempt at benchmarks --- benchmarks/benchmarks/stats.py | 38 ++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 benchmarks/benchmarks/stats.py diff --git a/benchmarks/benchmarks/stats.py b/benchmarks/benchmarks/stats.py new file mode 100644 index 00000000000..0530431900a --- /dev/null +++ b/benchmarks/benchmarks/stats.py @@ -0,0 +1,38 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Stats benchmark tests.""" + +import iris +from iris.analysis.stats import pearsonr +import iris.tests + + +class PearsonR: + def setup(self): + cube_temp = iris.load_cube( + iris.tests.get_data_path( + ("NetCDF", "global", "xyt", "SMALL_total_column_co2.nc") + ) + ) + + # Make data non-lazy. + cube_temp.data + + self.cube_a = cube_temp[:6] + self.cube_b = cube_temp[20:26] + self.cube_b.replace_coord(self.cube_a.coord("time")) + for name in ["latitude", "longitude"]: + self.cube_b.coord(name).guess_bounds() + self.weights = iris.analysis.cartography.area_weights(self.cube_b) + + def time_real(self): + pearsonr(self.cube_a, self.cube_b, weights=self.weights) + + def time_lazy(self): + for cube in self.cube_a, self.cube_b: + cube.data = cube.lazy_data() + + result = pearsonr(self.cube_a, self.cube_b, weights=self.weights) + result.data