pp-mo · pp-mo · Feb 14, 2014 · Feb 14, 2014
diff --git a/docs/iris/example_code/graphics/multidim_aggregation.py b/docs/iris/example_code/graphics/multidim_aggregation.py
@@ -0,0 +1,82 @@
+"""
+
+"""
+import matplotlib.pyplot as plt
+import numpy as np
+from scipy.stats import linregress
+
+import iris
+from iris.analysis import Aggregator
+import iris.quickplot as qplt
+from iris.util import rolling_window
+
+
+def _linfit_coeffs(a_x, a_y, axis):
+    collapsed_data_shape = list(a_x.shape)
+    collapsed_data_shape[axis] = 1
+    x_means = np.average(a_x, axis=axis)
+    x_bar = x_means.reshape(collapsed_data_shape)
+    y_means = np.average(a_y, axis=axis)
+    y_bar = y_means.reshape(collapsed_data_shape)
+    slope_numerator = np.average(a_x * a_y - x_bar * y_bar, axis=axis)
+    slope_denominator = np.average(a_x * a_x - x_bar * x_bar, axis=axis)
+    slopes = slope_numerator / slope_denominator
+    offsets = y_means - slopes * x_means
+    return slopes, offsets
+
+
+# Define a function to perform the custom statistical operation.
+# Note: in order to meet the requirements of iris.analysis.Aggregator, it must
+# do the calculation over an arbitrary (given) data axis.
+def time_correlation_slope(data, times, axis=-1):
+    """
+    Function to calculate the number of points in a sequence where the value
+    has exceeded a threshold value for at least a certain number of timepoints.
+
+    Generalised to operate on multiple time sequences arranged on a specific
+    axis of a multidimensional array.
+
+    Args:
+
+    * data (array):
+        raw data.
+
+    * times (array):
+        axis values for 'data' points.
+
+    """
+    if axis < 0:
+        # just cope with negative axis numbers
+        axis += data.ndim
+
+#    # THIS WAY will need to be repeated over all the points ...
+#    slope, intercept, r_coeff, p_value, std_err = linregress(x=times, y=data)
+    slope, offset = _linfit_coeffs(a_x=times, a_y=data, axis=axis)
+    return slope
+
+
+def main():
+    # Load the whole time-sequence as a single cube.
+    file_path = iris.sample_data_path('E1_north_america.nc')
+    cube = iris.load_cube(file_path)
+
+    # Make an aggregator from the user function.
+    time_unit = cube.coord('time').units
+    SLOPE = Aggregator('time_slope',
+                       time_correlation_slope,
+                       units_func=lambda units: units / time_unit,
+                       aux_data_keys='times')
+
+    # Calculate the statistic.
+    warm_periods = cube.collapsed('time', SLOPE, times='time')
+    warm_periods.rename('Time correlation slopes')
+    warm_periods.convert_units('1e-9 K s-1')
+
+    # Plot the results.
+    qplt.contourf(warm_periods, cmap='RdYlBu_r')
+    plt.gca().coastlines()
+    plt.show()
+
+
+if __name__ == '__main__':
+    main()
diff --git a/lib/iris/analysis/__init__.py b/lib/iris/analysis/__init__.py
@@ -350,7 +350,7 @@ class Aggregator(object):
     """
 
     def __init__(self, cell_method, call_func, units_func=None,
-                 lazy_func=None, **kwargs):
+                 lazy_func=None, aux_data_keys=None, **kwargs):
         """
         Create an aggregator for the given :data:`call_func`.
 
@@ -375,6 +375,10 @@ def __init__(self, cell_method, call_func, units_func=None,
             Returns an :class:`iris.units.Unit`, or a
             value that can be made into one.
 
+        * aux_data_keys (iterable of string):
+            Names of keywords that, if present, contain additional values for
+            each point of 'data' (and therefore of the same shape).
+
         * lazy_func (callable):
             An alternative to :data:`call_func` implementing a lazy
             aggregation. Note that, it need not support all features of the
@@ -401,6 +405,13 @@ def __init__(self, cell_method, call_func, units_func=None,
         self.call_func = call_func
         #: Unit conversion function.
         self.units_func = units_func
+        #: Auxiliary data keywords.
+        self.aux_data_keys = aux_data_keys
+        # (Normalise to a tuple of strings.)
+        if self.aux_data_keys is None:
+            self.aux_data_keys = []
+        elif isinstance(self.aux_data_keys, basestring):
+            self.aux_data_keys = [self.aux_data_keys]
         #: Lazy aggregation function.
         self.lazy_func = lazy_func
 
@@ -602,6 +613,10 @@ def __init__(self, cell_method, call_func, units_func=None,
 
         #: A list of keywords that trigger weighted behaviour.
         self._weighting_keywords = ["returned", "weights"]
+        # Record that 'weights' is data-sized (i.e. a weight for every point)
+        # N.B. rolling_window **must ignore this concept** (!)
+        if 'weights' not in self.aux_data_keys:
+            self.aux_data_keys.append('weights')
 
     def uses_weighting(self, **kwargs):
         """

diff --git a/lib/iris/cube.py b/lib/iris/cube.py
@@ -2794,11 +2794,33 @@ def collapsed(self, coords, aggregator, lazy=False, **kwargs):
             dims = untouched_dims + dims_to_collapse
             unrolled_data = np.transpose(self.data, dims).reshape(new_shape)
 
-            # Perform the same operation on the weights if applicable
-            if kwargs.get("weights") is not None:
-                weights = kwargs["weights"].view()
-                kwargs["weights"] = np.transpose(weights,
-                                                 dims).reshape(new_shape)
+#            # Perform the same operation on the weights if applicable
+#            if kwargs.get("weights") is not None:
+#                weights = kwargs["weights"].view()
+#                kwargs["weights"] = np.transpose(weights,
+#                                                 dims).reshape(new_shape)
+
+            # Treat any declared "auxiliary data keys" similarly to 'weights'.
+            # EXCEPT: we allow coords, and will broadcast to the right shape.
+            # TODO: several holes here.  It's proof-of-concept at present.
+            #  * should not use private maths function for broadcasting (!)
+            #  * needs more care in using coords -- units compatibility ??
+            for aux_key in aggregator.aux_data_keys:
+                aux_data = kwargs.get(aux_key, None)
+                if aux_data is not None:
+                    if isinstance(aux_data, basestring):
+                        # convert a name to a Coord
+                        aux_data = self.coord(aux_data)
+                    if isinstance(aux_data, iris.coords.Coord):
+                        # Convert a coord to an array
+                        aux_data = \
+                            iris.analysis.maths._broadcast_cube_coord_data(
+                                self, aux_data, 'addition')
+                    # expand/replicate to cube shape + put back into the key
+                    aux_data, _ = np.broadcast_arrays(aux_data, self.data)
+                    aux_data = aux_data.view()
+                    aux_data = np.transpose(aux_data, dims).reshape(new_shape)
+                    kwargs[aux_key] = aux_data
 
             data_result = aggregator.aggregate(unrolled_data,
                                                axis=-1,

diff --git a/lib/iris/tests/unit/cube/test_Cube.py b/lib/iris/tests/unit/cube/test_Cube.py
@@ -135,6 +135,7 @@ def _aggregator(self, uses_weighting):
         # Returns a mock aggregator with a mocked method (uses_weighting)
         # which returns the given True/False condition.
         aggregator = mock.Mock(spec=WeightedAggregator)
+        aggregator.aux_data_keys = []
         aggregator.cell_method = None
         aggregator.uses_weighting = mock.Mock(return_value=uses_weighting)
 
@@ -155,6 +156,7 @@ def _assert_nowarn_collapse_without_weight(self, coords, warn):
     def test_lat_lon_noweighted_aggregator(self):
         # Collapse latitude coordinate with unweighted aggregator.
         aggregator = mock.Mock(spec=Aggregator)
+        aggregator.aux_data_keys = []
         aggregator.cell_method = None
         coords = ['latitude', 'longitude']