Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

C++ refactoring: Highlevel non-reducers and improved testing/fixes for reducers. #1206

Merged
merged 8 commits into from
Jan 7, 2022
2 changes: 1 addition & 1 deletion src/awkward/_v2/_broadcasting.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,7 +456,7 @@ def continuation():
else:
raise ValueError(
"cannot broadcast RegularArray of size "
"{0} with RegularArray of size {1}{2}".format(
"{0} with RegularArray of size {1} {2}".format(
x.size, maxsize, in_function(options)
)
)
Expand Down
10 changes: 7 additions & 3 deletions src/awkward/_v2/contents/listoffsetarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1645,6 +1645,7 @@ def _reduce_next(

trimmed = self._content[self.offsets[0] : self.offsets[-1]]
nextstarts = self.offsets[:-1]

outcontent = trimmed._reduce_next(
reducer,
negaxis,
Expand All @@ -1655,6 +1656,7 @@ def _reduce_next(
mask,
keepdims,
)

outoffsets = ak._v2.index.Index64.empty(outlength + 1, self._nplike)
self._handle_error(
self._nplike[
Expand All @@ -1669,9 +1671,11 @@ def _reduce_next(
)
)

if (
keepdims and self._content.dimension_optiontype
): # FIXME not self._represents_regular
represents_regular = getattr(self, "_represents_regular", False)

if keepdims and (
not represents_regular or self._content.dimension_optiontype
):
if isinstance(outcontent, ak._v2.contents.RegularArray):
outcontent = outcontent.toListOffsetArray64(False)

Expand Down
12 changes: 12 additions & 0 deletions src/awkward/_v2/contents/numpyarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -978,6 +978,18 @@ def _reduce_next(
mask,
keepdims,
):
if len(self._data.shape) != 1 or not self.is_contiguous:
return self.toRegularArray()._reduce_next(
reducer,
negaxis,
starts,
shifts,
parents,
outlength,
mask,
keepdims,
)

out = reducer.apply(self, parents, outlength)

if reducer.needs_position:
Expand Down
6 changes: 4 additions & 2 deletions src/awkward/_v2/contents/regulararray.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,9 @@ def merge_parameters(self, parameters):

def toListOffsetArray64(self, start_at_zero=False):
offsets = self._compact_offsets64(start_at_zero)
return self._broadcast_tooffsets64(offsets)
tmp = self._broadcast_tooffsets64(offsets)
tmp._represents_regular = True
return tmp

def toRegularArray(self):
return self
Expand Down Expand Up @@ -900,7 +902,7 @@ def _reduce_next(
)

if not self._content.dimension_optiontype:
branch, depth = self._content.branch_depth
branch, depth = self.branch_depth
convert_shallow = negaxis == depth
convert_deep = negaxis + 2 == depth

Expand Down
166 changes: 88 additions & 78 deletions src/awkward/_v2/operations/reducers/ak_corr.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,87 +16,97 @@ def corr(
mask_identity=True,
flatten_records=False,
):
raise NotImplementedError
"""
Args:
x: One coordinate to use in the correlation (anything #ak.to_layout recognizes).
y: The other coordinate to use in the correlation (anything #ak.to_layout recognizes).
weight: Data that can be broadcasted to `x` and `y` to give each point
a weight. Weighting points equally is the same as no weights;
weighting some points higher increases the significance of those
points. Weights can be zero or negative.
axis (None or int): If None, combine all values from the array into
a single scalar result; if an int, group by that axis: `0` is the
outermost, `1` is the first level of nested lists, etc., and
negative `axis` counts from the innermost: `-1` is the innermost,
`-2` is the next level up, etc.
keepdims (bool): If False, this function decreases the number of
dimensions by 1; if True, the output values are wrapped in a new
length-1 dimension so that the result of this operation may be
broadcasted with the original array.
mask_identity (bool): If True, the application of this function on
empty lists results in None (an option type); otherwise, the
calculation is followed through with the reducers' identities,
usually resulting in floating-point `nan`.
flatten_records (bool): If True, axis=None combines fields from different
records; otherwise, records raise an error.

Computes the correlation of `x` and `y` (many types supported, including
all Awkward Arrays and Records, must be broadcastable to each other).
The grouping is performed the same way as for reducers, though this
operation is not a reducer and has no identity.

# """
# Args:
# x: One coordinate to use in the correlation (anything #ak.to_layout recognizes).
# y: The other coordinate to use in the correlation (anything #ak.to_layout recognizes).
# weight: Data that can be broadcasted to `x` and `y` to give each point
# a weight. Weighting points equally is the same as no weights;
# weighting some points higher increases the significance of those
# points. Weights can be zero or negative.
# axis (None or int): If None, combine all values from the array into
# a single scalar result; if an int, group by that axis: `0` is the
# outermost, `1` is the first level of nested lists, etc., and
# negative `axis` counts from the innermost: `-1` is the innermost,
# `-2` is the next level up, etc.
# keepdims (bool): If False, this function decreases the number of
# dimensions by 1; if True, the output values are wrapped in a new
# length-1 dimension so that the result of this operation may be
# broadcasted with the original array.
# mask_identity (bool): If True, the application of this function on
# empty lists results in None (an option type); otherwise, the
# calculation is followed through with the reducers' identities,
# usually resulting in floating-point `nan`.
# flatten_records (bool): If True, axis=None combines fields from different
# records; otherwise, records raise an error.
This function has no NumPy equivalent.

# Computes the correlation of `x` and `y` (many types supported, including
# all Awkward Arrays and Records, must be broadcastable to each other).
# The grouping is performed the same way as for reducers, though this
# operation is not a reducer and has no identity.
Passing all arguments to the reducers, the correlation is calculated as

# This function has no NumPy equivalent.
ak.sum((x - ak.mean(x))*(y - ak.mean(y))*weight)
/ np.sqrt(ak.sum((x - ak.mean(x))**2))
/ np.sqrt(ak.sum((y - ak.mean(y))**2))

# Passing all arguments to the reducers, the correlation is calculated as
See #ak.sum for a complete description of handling nested lists and
missing values (None) in reducers, and #ak.mean for an example with another
non-reducer.
"""
x = ak._v2.highlevel.Array(
ak._v2.operations.convert.to_layout(x, allow_record=False, allow_other=False)
)
y = ak._v2.highlevel.Array(
ak._v2.operations.convert.to_layout(y, allow_record=False, allow_other=False)
)
if weight is not None:
weight = ak._v2.highlevel.Array(
ak._v2.operations.convert.to_layout(
weight, allow_record=False, allow_other=False
)
)

# ak.sum((x - ak.mean(x))*(y - ak.mean(y))*weight)
# / np.sqrt(ak.sum((x - ak.mean(x))**2))
# / np.sqrt(ak.sum((y - ak.mean(y))**2))

# See #ak.sum for a complete description of handling nested lists and
# missing values (None) in reducers, and #ak.mean for an example with another
# non-reducer.
# """
# with np.errstate(invalid="ignore"):
# xmean = mean(
# x, weight=weight, axis=axis, keepdims=keepdims, mask_identity=mask_identity
# )
# ymean = mean(
# y, weight=weight, axis=axis, keepdims=keepdims, mask_identity=mask_identity
# )
# xdiff = x - xmean
# ydiff = y - ymean
# if weight is None:
# sumwxx = sum(
# xdiff ** 2, axis=axis, keepdims=keepdims, mask_identity=mask_identity
# )
# sumwyy = sum(
# ydiff ** 2, axis=axis, keepdims=keepdims, mask_identity=mask_identity
# )
# sumwxy = sum(
# xdiff * ydiff, axis=axis, keepdims=keepdims, mask_identity=mask_identity
# )
# else:
# sumwxx = sum(
# (xdiff ** 2) * weight,
# axis=axis,
# keepdims=keepdims,
# mask_identity=mask_identity,
# )
# sumwyy = sum(
# (ydiff ** 2) * weight,
# axis=axis,
# keepdims=keepdims,
# mask_identity=mask_identity,
# )
# sumwxy = sum(
# (xdiff * ydiff) * weight,
# axis=axis,
# keepdims=keepdims,
# mask_identity=mask_identity,
# )
# nplike = ak.nplike.of(sumwxy, sumwxx, sumwyy)
# return nplike.true_divide(sumwxy, nplike.sqrt(sumwxx * sumwyy))
with np.errstate(invalid="ignore"):
xmean = ak._v2.operations.reducers.mean(
x, weight=weight, axis=axis, keepdims=False, mask_identity=mask_identity
)
ymean = ak._v2.operations.reducers.mean(
y, weight=weight, axis=axis, keepdims=False, mask_identity=mask_identity
)
xdiff = x - xmean
ydiff = y - ymean
if weight is None:
sumwxx = ak._v2.operations.reducers.sum(
xdiff ** 2, axis=axis, keepdims=keepdims, mask_identity=mask_identity
)
sumwyy = ak._v2.operations.reducers.sum(
ydiff ** 2, axis=axis, keepdims=keepdims, mask_identity=mask_identity
)
sumwxy = ak._v2.operations.reducers.sum(
xdiff * ydiff, axis=axis, keepdims=keepdims, mask_identity=mask_identity
)
else:
sumwxx = ak._v2.operations.reducers.sum(
(xdiff ** 2) * weight,
axis=axis,
keepdims=keepdims,
mask_identity=mask_identity,
)
sumwyy = ak._v2.operations.reducers.sum(
(ydiff ** 2) * weight,
axis=axis,
keepdims=keepdims,
mask_identity=mask_identity,
)
sumwxy = ak._v2.operations.reducers.sum(
(xdiff * ydiff) * weight,
axis=axis,
keepdims=keepdims,
mask_identity=mask_identity,
)
nplike = ak.nplike.of(sumwxy, sumwxx, sumwyy)
return nplike.true_divide(sumwxy, nplike.sqrt(sumwxx * sumwyy))
Loading