Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Multivar aggs #8

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 82 additions & 0 deletions docs/iris/example_code/graphics/multidim_aggregation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
"""

"""
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import linregress

import iris
from iris.analysis import Aggregator
import iris.quickplot as qplt
from iris.util import rolling_window


def _linfit_coeffs(a_x, a_y, axis):
collapsed_data_shape = list(a_x.shape)
collapsed_data_shape[axis] = 1
x_means = np.average(a_x, axis=axis)
x_bar = x_means.reshape(collapsed_data_shape)
y_means = np.average(a_y, axis=axis)
y_bar = y_means.reshape(collapsed_data_shape)
slope_numerator = np.average(a_x * a_y - x_bar * y_bar, axis=axis)
slope_denominator = np.average(a_x * a_x - x_bar * x_bar, axis=axis)
slopes = slope_numerator / slope_denominator
offsets = y_means - slopes * x_means
return slopes, offsets


# Define a function to perform the custom statistical operation.
# Note: in order to meet the requirements of iris.analysis.Aggregator, it must
# do the calculation over an arbitrary (given) data axis.
def time_correlation_slope(data, times, axis=-1):
"""
Function to calculate the number of points in a sequence where the value
has exceeded a threshold value for at least a certain number of timepoints.

Generalised to operate on multiple time sequences arranged on a specific
axis of a multidimensional array.

Args:

* data (array):
raw data.

* times (array):
axis values for 'data' points.

"""
if axis < 0:
# just cope with negative axis numbers
axis += data.ndim

# # THIS WAY will need to be repeated over all the points ...
# slope, intercept, r_coeff, p_value, std_err = linregress(x=times, y=data)
slope, offset = _linfit_coeffs(a_x=times, a_y=data, axis=axis)
return slope


def main():
# Load the whole time-sequence as a single cube.
file_path = iris.sample_data_path('E1_north_america.nc')
cube = iris.load_cube(file_path)

# Make an aggregator from the user function.
time_unit = cube.coord('time').units
SLOPE = Aggregator('time_slope',
time_correlation_slope,
units_func=lambda units: units / time_unit,
aux_data_keys='times')

# Calculate the statistic.
warm_periods = cube.collapsed('time', SLOPE, times='time')
warm_periods.rename('Time correlation slopes')
warm_periods.convert_units('1e-9 K s-1')

# Plot the results.
qplt.contourf(warm_periods, cmap='RdYlBu_r')
plt.gca().coastlines()
plt.show()


if __name__ == '__main__':
main()
17 changes: 16 additions & 1 deletion lib/iris/analysis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ class Aggregator(object):
"""

def __init__(self, cell_method, call_func, units_func=None,
lazy_func=None, **kwargs):
lazy_func=None, aux_data_keys=None, **kwargs):
"""
Create an aggregator for the given :data:`call_func`.

Expand All @@ -375,6 +375,10 @@ def __init__(self, cell_method, call_func, units_func=None,
Returns an :class:`iris.units.Unit`, or a
value that can be made into one.

* aux_data_keys (iterable of string):
Names of keywords that, if present, contain additional values for
each point of 'data' (and therefore of the same shape).

* lazy_func (callable):
An alternative to :data:`call_func` implementing a lazy
aggregation. Note that, it need not support all features of the
Expand All @@ -401,6 +405,13 @@ def __init__(self, cell_method, call_func, units_func=None,
self.call_func = call_func
#: Unit conversion function.
self.units_func = units_func
#: Auxiliary data keywords.
self.aux_data_keys = aux_data_keys
# (Normalise to a tuple of strings.)
if self.aux_data_keys is None:
self.aux_data_keys = []
elif isinstance(self.aux_data_keys, basestring):
self.aux_data_keys = [self.aux_data_keys]
#: Lazy aggregation function.
self.lazy_func = lazy_func

Expand Down Expand Up @@ -602,6 +613,10 @@ def __init__(self, cell_method, call_func, units_func=None,

#: A list of keywords that trigger weighted behaviour.
self._weighting_keywords = ["returned", "weights"]
# Record that 'weights' is data-sized (i.e. a weight for every point)
# N.B. rolling_window **must ignore this concept** (!)
if 'weights' not in self.aux_data_keys:
self.aux_data_keys.append('weights')

def uses_weighting(self, **kwargs):
"""
Expand Down
32 changes: 27 additions & 5 deletions lib/iris/cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -2794,11 +2794,33 @@ def collapsed(self, coords, aggregator, lazy=False, **kwargs):
dims = untouched_dims + dims_to_collapse
unrolled_data = np.transpose(self.data, dims).reshape(new_shape)

# Perform the same operation on the weights if applicable
if kwargs.get("weights") is not None:
weights = kwargs["weights"].view()
kwargs["weights"] = np.transpose(weights,
dims).reshape(new_shape)
# # Perform the same operation on the weights if applicable
# if kwargs.get("weights") is not None:
# weights = kwargs["weights"].view()
# kwargs["weights"] = np.transpose(weights,
# dims).reshape(new_shape)

# Treat any declared "auxiliary data keys" similarly to 'weights'.
# EXCEPT: we allow coords, and will broadcast to the right shape.
# TODO: several holes here. It's proof-of-concept at present.
# * should not use private maths function for broadcasting (!)
# * needs more care in using coords -- units compatibility ??
for aux_key in aggregator.aux_data_keys:
aux_data = kwargs.get(aux_key, None)
if aux_data is not None:
if isinstance(aux_data, basestring):
# convert a name to a Coord
aux_data = self.coord(aux_data)
if isinstance(aux_data, iris.coords.Coord):
# Convert a coord to an array
aux_data = \
iris.analysis.maths._broadcast_cube_coord_data(
self, aux_data, 'addition')
# expand/replicate to cube shape + put back into the key
aux_data, _ = np.broadcast_arrays(aux_data, self.data)
aux_data = aux_data.view()
aux_data = np.transpose(aux_data, dims).reshape(new_shape)
kwargs[aux_key] = aux_data

data_result = aggregator.aggregate(unrolled_data,
axis=-1,
Expand Down
2 changes: 2 additions & 0 deletions lib/iris/tests/unit/cube/test_Cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ def _aggregator(self, uses_weighting):
# Returns a mock aggregator with a mocked method (uses_weighting)
# which returns the given True/False condition.
aggregator = mock.Mock(spec=WeightedAggregator)
aggregator.aux_data_keys = []
aggregator.cell_method = None
aggregator.uses_weighting = mock.Mock(return_value=uses_weighting)

Expand All @@ -155,6 +156,7 @@ def _assert_nowarn_collapse_without_weight(self, coords, warn):
def test_lat_lon_noweighted_aggregator(self):
# Collapse latitude coordinate with unweighted aggregator.
aggregator = mock.Mock(spec=Aggregator)
aggregator.aux_data_keys = []
aggregator.cell_method = None
coords = ['latitude', 'longitude']

Expand Down