ESMValGroup · bouweandela · Nov 29, 2022 · Nov 18, 2022 · Nov 18, 2022 · Nov 18, 2022
diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst
@@ -1007,14 +1007,13 @@ of argument parameters passed to ``multi_model_statistics``. Percentiles can be
 specified like ``p1.5`` or ``p95``. The decimal point will be replaced by a dash
 in the output file.
 
-Restrictive computation is also available by excluding  any set of models that
+Restrictive computation is also available by excluding any set of models that
 the user will not want to include in the statistics (by setting ``exclude:
-[excluded models list]`` argument). The implementation has a few restrictions
-that apply to the input data: model datasets must have consistent shapes, apart
-from the time dimension; and cubes with more than four dimensions (time,
-vertical axis, two horizontal axes) are not supported.
+[excluded models list]`` argument).
 
-Input datasets may have different time coordinates. Statistics can be computed
+Input datasets may have different time coordinates.
+Apart from that, all dimensions must match.
+Statistics can be computed
 across overlapping times only (``span: overlap``) or across the full time span
 of the combined models (``span: full``). The preprocessor sets a common time
 coordinate on all datasets. As the number of days in a year may vary between
@@ -1038,10 +1037,6 @@ parameter ``keep_input_datasets`` to ``false`` (default value is ``true``).
           exclude: [NCEP]
           keep_input_datasets: false
 
-Input datasets may have different time coordinates. The multi-model statistics
-preprocessor sets a common time coordinate on all datasets. As the number of
-days in a year may vary between calendars, (sub-)daily data are not supported.
-
 Multi-model statistics also supports a ``groupby`` argument. You can group by
 any dataset key (``project``, ``experiment``, etc.) or a combination of keys in a list. You can
 also add an arbitrary tag to a dataset definition and then group by that tag. When
@@ -1967,11 +1962,11 @@ See also :func:`esmvalcore.preprocessor.detrend`.
 Rolling window statistics
 =========================
 
-One can calculate rolling window statistics using the 
-preprocessor function ``rolling_window_statistics``. 
+One can calculate rolling window statistics using the
+preprocessor function ``rolling_window_statistics``.
 This function takes three parameters:
 
-* ``coordinate``: coordinate over which the rolling-window statistics is 
+* ``coordinate``: coordinate over which the rolling-window statistics is
   calculated.
 
 * ``operator``: operation to apply. Accepted values are 'mean', 'median',
@@ -1980,12 +1975,12 @@ This function takes three parameters:
 * ``window_length``: size of the rolling window to use (number of points).
 
 This example applied on daily precipitation data calculates two-day rolling
-precipitation sum. 
+precipitation sum.
 
 .. code-block:: yaml
 
   preprocessors:
-    preproc_rolling_window: 
+    preproc_rolling_window:
       coordinate: time
       operator: sum
       window_length: 2

diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py
@@ -17,6 +17,8 @@
 import iris
 import iris.coord_categorisation
 import numpy as np
+from iris.cube import Cube, CubeList
+from iris.exceptions import MergeError
 from iris.util import equalise_attributes
 
 from esmvalcore.iris_helpers import date2num
@@ -139,8 +141,16 @@ def _unify_time_coordinates(cubes):
         # Update the cubes' time coordinate (both point values and the units!)
         cube.coord('time').points = date2num(dates, t_unit, coord.dtype)
         cube.coord('time').units = t_unit
-        cube.coord('time').bounds = None
+        _guess_time_bounds(cube)
+
+
+def _guess_time_bounds(cube):
+    """Guess time bounds if possible."""
+    cube.coord('time').bounds = None
+    try:
         cube.coord('time').guess_bounds()
+    except ValueError:  # Time has length 1 --> guessing bounds not possible
+        return
 
 
 def _time_coords_are_aligned(cubes):
@@ -196,7 +206,7 @@ def _map_to_new_time(cube, time_points):
     return new_cube
 
 
-def _align(cubes, span):
+def _align_time_coord(cubes, span):
     """Expand or subset cubes so they share a common time span."""
     _unify_time_coordinates(cubes)
 
@@ -217,8 +227,7 @@ def _align(cubes, span):
 
     for cube in new_cubes:
         # Make sure bounds exist and are consistent
-        cube.coord('time').bounds = None
-        cube.coord('time').guess_bounds()
+        _guess_time_bounds(cube)
 
     return new_cubes
 
@@ -288,27 +297,47 @@ def _combine(cubes):
         concat_dim = iris.coords.AuxCoord(i, var_name=CONCAT_DIM)
         cube.add_aux_coord(concat_dim)
 
-    cubes = iris.cube.CubeList(cubes)
+    cubes = CubeList(cubes)
 
-    merged_cube = cubes.merge_cube()
+    try:
+        merged_cube = cubes.merge_cube()
+    except MergeError as exc:
+        raise ValueError(
+            "Multi-model statistics failed to merge input cubes into a single "
+            "array"
+        ) from exc
 
     return merged_cube
 
 
 def _compute_slices(cubes):
-    """Create cube slices resulting in a combined cube of about 1 GiB."""
+    """Create cube slices along the first dimension of the cubes.
+
+    This results in a combined cube of about 1 GiB.
+
+    Note
+    ----
+    For scalar cubes, simply return ``None``.
+
+    """
+    # Scalar cubes
+    if cubes[0].shape == ():
+        yield None
+        return
+
+    # Non-scalar cubes
     gibibyte = 2**30
     total_bytes = cubes[0].data.nbytes * len(cubes)
     n_slices = int(np.ceil(total_bytes / gibibyte))
 
-    n_timesteps = cubes[0].shape[0]
-    slice_len = int(np.ceil(n_timesteps / n_slices))
+    len_dim_0 = cubes[0].shape[0]
+    slice_len = int(np.ceil(len_dim_0 / n_slices))
 
     for i in range(n_slices):
         start = i * slice_len
         end = (i + 1) * slice_len
-        if end >= n_timesteps:
-            yield slice(start, n_timesteps)
+        if end >= len_dim_0:
+            yield slice(start, len_dim_0)
             return
         yield slice(start, end)
 
@@ -320,7 +349,10 @@ def _compute_eager(cubes: list, *, operator: iris.analysis.Aggregator,
 
     result_slices = []
     for chunk in _compute_slices(cubes):
-        single_model_slices = [cube[chunk] for cube in cubes]
+        if chunk is None:
+            single_model_slices = cubes  # scalar cubes
+        else:
+            single_model_slices = [cube[chunk] for cube in cubes]
         combined_slice = _combine(single_model_slices)
         with warnings.catch_warnings():
             warnings.filterwarnings(
@@ -342,14 +374,14 @@ def _compute_eager(cubes: list, *, operator: iris.analysis.Aggregator,
         result_slices.append(collapsed_slice)
 
     try:
-        result_cube = iris.cube.CubeList(result_slices).concatenate_cube()
+        result_cube = CubeList(result_slices).concatenate_cube()
     except Exception as excinfo:
         raise ValueError(
-            "Multi-model statistics failed to concatenate results into a"
-            f" single array. This happened for operator {operator}"
-            f" with computed statistics {result_slices}."
-            "This can happen e.g. if the calculation results in inconsistent"
-            f" dtypes. Encountered the following exception: {excinfo}")
+            f"Multi-model statistics failed to concatenate results into a "
+            f"single array. This happened for operator {operator} "
+            f"with computed statistics {result_slices}. "
+            f"This can happen e.g. if the calculation results in inconsistent "
+            f"dtypes") from excinfo
 
     result_cube.data = np.ma.array(result_cube.data)
     result_cube.remove_coord(CONCAT_DIM)
@@ -377,9 +409,24 @@ def _multicube_statistics(cubes, statistics, span):
         raise ValueError('Cannot perform multicube statistics '
                          'for a single cube.')
 
-    copied_cubes = [cube.copy() for cube in cubes]  # avoid modifying inputs
-    aligned_cubes = _align(copied_cubes, span=span)
+    # Avoid modifying inputs
+    copied_cubes = [cube.copy() for cube in cubes]
+
+    # If all cubes contain a time coordinate, align them. If no cube contains a
+    # time coordinate, do nothing. Else, raise an exception
+    time_coords = [cube.coords('time') for cube in cubes]
+    if all(time_coords):
+        aligned_cubes = _align_time_coord(copied_cubes, span=span)
+    elif not any(time_coords):
+        aligned_cubes = copied_cubes
+    else:
+        raise ValueError(
+            "Multi-model statistics failed to merge input cubes into a single "
+            "array: some cubes have a 'time' dimension, some do not have a "
+            "'time' dimension."
+        )
 
+    # Calculate statistics
     statistics_cubes = {}
     for statistic in statistics:
         logger.debug('Multicube statistics: computing: %s', statistic)
@@ -439,9 +486,9 @@ def multi_model_statistics(products,
     workflow and provenance information, and this option should typically be
     ignored.
 
-    Apart from the time coordinate, cubes must have consistent shapes. There
-    are two options to combine time coordinates of different lengths, see the
-    ``span`` argument.
+    Cubes must have consistent shapes apart from a potential time dimension.
+    There are two options to combine time coordinates of different lengths, see
+    the ``span`` argument.
 
     Uses the statistical operators in :py:mod:`iris.analysis`, including
     ``mean``, ``median``, ``min``, ``max``, and ``std``. Percentiles are also
@@ -461,7 +508,8 @@ def multi_model_statistics(products,
     span: str
         Overlap or full; if overlap, statitstics are computed on common time-
         span; if full, statistics are computed on full time spans, ignoring
-        missing data.
+        missing data. This option is ignored if input cubes do not have time
+        dimensions.
     statistics: list
         Statistical metrics to be computed, e.g. [``mean``, ``max``]. Choose
         from the operators listed in the iris.analysis package. Percentiles can
@@ -471,8 +519,8 @@ def multi_model_statistics(products,
         preprocessorfiles as values. If products are passed as input, the
         statistics cubes will be assigned to these output products.
     groupby:  tuple
-        Group products by a given tag or attribute, e.g.
-        ('project', 'dataset', 'tag1').
+        Group products by a given tag or attribute, e.g., ('project',
+        'dataset', 'tag1'). This is ignored if ``products`` is a list of cubes.
     keep_input_datasets: bool
         If True, the output will include the input datasets.
         If False, only the computed statistics will be returned.
@@ -489,7 +537,7 @@ def multi_model_statistics(products,
         If span is neither overlap nor full, or if input type is neither cubes
         nor products.
     """
-    if all(isinstance(p, iris.cube.Cube) for p in products):
+    if all(isinstance(p, Cube) for p in products):
         return _multicube_statistics(
             cubes=products,
             statistics=statistics,
@@ -514,9 +562,10 @@ def multi_model_statistics(products,
 
         return statistics_products
     raise ValueError(
-        "Input type for multi_model_statistics not understood. Expected "
-        "iris.cube.Cube or esmvalcore.preprocessor.PreprocessorFile, "
-        "got {}".format(products))
+        f"Input type for multi_model_statistics not understood. Expected "
+        f"iris.cube.Cube or esmvalcore.preprocessor.PreprocessorFile, "
+        f"got {products}"
+    )
 
 
 def ensemble_statistics(products, statistics,