From 80dc689d661a416cffb36ee81eb2dc8f5aa93379 Mon Sep 17 00:00:00 2001 From: Manuel Schlund <32543114+schlunma@users.noreply.github.com> Date: Fri, 13 Jan 2023 17:32:49 +0100 Subject: [PATCH] Allowed usage of `multi_model_statistics` on single cubes/products (#1849) --- esmvalcore/preprocessor/_multimodel.py | 15 ++- .../_multimodel/test_multimodel.py | 110 +++++++++++++++--- 2 files changed, 108 insertions(+), 17 deletions(-) diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index 6fb5047fde..fc25490119 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -19,7 +19,7 @@ import numpy as np from iris.cube import Cube, CubeList from iris.exceptions import MergeError -from iris.util import equalise_attributes +from iris.util import equalise_attributes, new_axis from esmvalcore.iris_helpers import date2num from esmvalcore.preprocessor import remove_fx_variables @@ -302,6 +302,12 @@ def _combine(cubes): cubes = CubeList(cubes) + # For a single cube, merging returns a scalar CONCAT_DIM, which leads to a + # "Cannot collapse a dimension which does not describe any data" error when + # collapsing. Thus, treat single cubes differently here. + if len(cubes) == 1: + return new_axis(cubes[0], scalar_coord=CONCAT_DIM) + try: merged_cube = cubes.merge_cube() except MergeError as exc: @@ -411,9 +417,10 @@ def _multicube_statistics(cubes, statistics, span): Cubes are merged and subsequently collapsed along a new auxiliary coordinate. Inconsistent attributes will be removed. """ - if len(cubes) == 1: - raise ValueError('Cannot perform multicube statistics ' - 'for a single cube.') + if not cubes: + raise ValueError( + "Cannot perform multicube statistics for an empty list of cubes" + ) # Avoid modifying inputs copied_cubes = [cube.copy() for cube in cubes] diff --git a/tests/unit/preprocessor/_multimodel/test_multimodel.py b/tests/unit/preprocessor/_multimodel/test_multimodel.py index 0625813916..29c2a60f9a 100644 --- a/tests/unit/preprocessor/_multimodel/test_multimodel.py +++ b/tests/unit/preprocessor/_multimodel/test_multimodel.py @@ -669,19 +669,6 @@ def test_edge_case_sub_daily_data_fail(span): _ = multi_model_statistics(cubes, span, statistics) -@pytest.mark.parametrize('span', SPAN_OPTIONS) -def test_edge_case_single_cube_fail(span): - """Test that an error is raised when a single cube is passed.""" - cube = generate_cube_from_dates('monthly') - cubes = (cube, ) - - statistic = 'min' - statistics = (statistic, ) - - with pytest.raises(ValueError): - _ = multi_model_statistics(cubes, span, statistics) - - def test_unify_time_coordinates(): """Test set common calendar.""" cube1 = generate_cube_from_dates('monthly', @@ -1045,3 +1032,100 @@ def test_arbitrary_dims_0d(cubes_with_arbitrary_dimensions): stat_cube = stat_cubes['sum'] assert stat_cube.shape == () assert_array_allclose(stat_cube.data, np.ma.array(0.0)) + + +def test_empty_input_multi_model_statistics(): + """Check that ``multi_model_statistics`` fails with empty input.""" + msg = "Cannot perform multicube statistics for an empty list of cubes" + with pytest.raises(ValueError, match=msg): + mm.multi_model_statistics([], span='full', statistics=['mean']) + + +def test_empty_input_ensemble_statistics(): + """Check that ``ensemble_statistics`` fails with empty input.""" + msg = "Cannot perform multicube statistics for an empty list of cubes" + with pytest.raises(ValueError, match=msg): + mm.ensemble_statistics( + [], span='full', statistics=['mean'], output_products=[] + ) + + +STATS = ['mean', 'median', 'min', 'max', 'p42.314', 'std_dev'] + + +@pytest.mark.parametrize('stat', STATS) +@pytest.mark.parametrize( + 'products', + [ + CubeList([generate_cube_from_dates('monthly')]), + set([PreprocessorFile(generate_cube_from_dates('monthly'))]), + ], +) +def test_single_input_multi_model_statistics(products, stat): + """Check that ``multi_model_statistics`` works with a single cube.""" + output = PreprocessorFile() + output_products = {'': {stat: output}} + kwargs = { + 'statistics': [stat], + 'span': 'full', + 'output_products': output_products, + 'keep_input_datasets': False, + } + + results = mm.multi_model_statistics(products, **kwargs) + + assert len(results) == 1 + + if isinstance(results, dict): # for cube as input + cube = results[stat] + else: # for PreprocessorFile as input + result = next(iter(results)) + assert len(result.cubes) == 1 + cube = result.cubes[0] + + if stat == 'std_dev': + assert_array_allclose( + cube.data, np.ma.masked_invalid([np.nan, np.nan, np.nan]) + ) + else: + assert_array_allclose(cube.data, np.ma.array([1.0, 1.0, 1.0])) + + +@pytest.mark.parametrize('stat', STATS) +@pytest.mark.parametrize( + 'products', + [ + CubeList([generate_cube_from_dates('monthly')]), + {PreprocessorFile(generate_cube_from_dates('monthly'))}, + ], +) +def test_single_input_ensemble_statistics(products, stat): + """Check that ``ensemble_statistics`` works with a single cube.""" + cube = generate_cube_from_dates('monthly') + attributes = { + 'project': 'project', + 'dataset': 'dataset', + 'exp': 'exp', + 'ensemble': '1', + } + products = {PreprocessorFile(cube, attributes=attributes)} + output = PreprocessorFile() + output_products = {'project_dataset_exp': {stat: output}} + kwargs = { + 'statistics': [stat], + 'output_products': output_products, + } + + results = mm.ensemble_statistics(products, **kwargs) + + assert len(results) == 1 + result = next(iter(results)) + assert len(result.cubes) == 1 + cube = result.cubes[0] + + if stat == 'std_dev': + assert_array_allclose( + cube.data, np.ma.masked_invalid([np.nan, np.nan, np.nan]) + ) + else: + assert_array_allclose(cube.data, np.ma.array([1.0, 1.0, 1.0]))