From 64b183eecde49b40721baa68d8b86b503f8306a0 Mon Sep 17 00:00:00 2001 From: Jason Detwiler Date: Wed, 20 Jul 2022 01:38:14 -0700 Subject: [PATCH 01/11] allow for sigma=0 --- src/pygama/math/utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/pygama/math/utils.py b/src/pygama/math/utils.py index 2ac380a55..a395d2347 100644 --- a/src/pygama/math/utils.py +++ b/src/pygama/math/utils.py @@ -94,6 +94,10 @@ def get_formatted_stats(mean, sigma, ndigs=2): convenience function for formatting mean +/- sigma to the right number of significant figures. """ + if sigma == 0: + fmt = '%d' % ndigs + fmt = '%#.' + fmt + 'g' + return fmt % mean, fmt % sigma sig_pos = int(np.floor(np.log10(abs(sigma)))) sig_fmt = '%d' % ndigs sig_fmt = '%#.' + sig_fmt + 'g' From ff0e0076e1a4edafc571e3b3741163a090b5bbc4 Mon Sep 17 00:00:00 2001 From: Jason Detwiler Date: Wed, 20 Jul 2022 01:38:28 -0700 Subject: [PATCH 02/11] fix stats string --- src/pygama/math/histogram.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pygama/math/histogram.py b/src/pygama/math/histogram.py index 13b44c695..198702ccc 100644 --- a/src/pygama/math/histogram.py +++ b/src/pygama/math/histogram.py @@ -439,7 +439,7 @@ def plot_hist(hist, bins, var=None, show_stats=False, stats_hloc=0.75, stats_vlo dmean = stddev/np.sqrt(N) mean, dmean = pgu.get_formatted_stats(mean, dmean, 2) - stats = fr'$\mu={mean} \pm {dmean}$\n$\sigma={stddev:#.3g}$' + stats = f'$\mu={mean} \pm {dmean}$\n$\sigma={stddev:#.3g}$' stats_fontsize = rcParams['legend.fontsize'] plt.text(stats_hloc, stats_vloc, stats, transform=plt.gca().transAxes, fontsize = stats_fontsize) From c37f4906c3800077dd232e8cd501528df33c1b0b Mon Sep 17 00:00:00 2001 From: Jason Detwiler Date: Wed, 20 Jul 2022 01:38:59 -0700 Subject: [PATCH 03/11] change to tcm structure --- src/pygama/evt/build_tcm.py | 2 +- src/pygama/evt/tcm.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/pygama/evt/build_tcm.py b/src/pygama/evt/build_tcm.py index 7bdb5957c..cd086fe16 100644 --- a/src/pygama/evt/build_tcm.py +++ b/src/pygama/evt/build_tcm.py @@ -69,7 +69,7 @@ def build_tcm(input_tables:list, coin_col:str, hash_func:str=r'\d+', window_ref=window_ref, array_ids=array_ids) for key in tcm_cols: tcm_cols[key] = lgdo.Array(nda=tcm_cols[key]) - tcm = lgdo.Table(col_dict=tcm_cols, attrs={ 'tables':str(all_tables), 'hash_func':str(hash_func) }) + tcm = lgdo.Struct(obj_dict=tcm_cols, attrs={ 'tables':str(all_tables), 'hash_func':str(hash_func) }) if out_file is not None: store.write_object(tcm, out_name, out_file, wo_mode=wo_mode) diff --git a/src/pygama/evt/tcm.py b/src/pygama/evt/tcm.py index 6ebe1f170..726681b26 100644 --- a/src/pygama/evt/tcm.py +++ b/src/pygama/evt/tcm.py @@ -78,7 +78,9 @@ def generate_tcm_cols(coin_data:list, coin_window:float=0, window_ref:str='last' raise NotImplementedError(f'window_ref {window_ref}') # now build the outputs - coin_idx = tcm.coin_idx.to_numpy() + cumulative_length = np.where(tcm.coin_idx.diff().to_numpy() != 0)[0] + cumulative_length[:-1] = cumulative_length[1:] + cumulative_length[-1] = len(tcm.coin_idx) array_id = tcm.array_id.to_numpy() array_idx = tcm.array_idx.to_numpy() if 'array_idx' in tcm else tcm.index.to_numpy() # beautiful! - return { 'coin_idx':coin_idx, 'array_id':array_id, 'array_idx':array_idx } + return { 'cumulative_length':cumulative_length, 'array_id':array_id, 'array_idx':array_idx } From 373d3d6082d714efe780fbf8af87bbf9de64fb44 Mon Sep 17 00:00:00 2001 From: Jason Detwiler Date: Wed, 20 Jul 2022 02:19:47 -0700 Subject: [PATCH 04/11] add explode --- src/pygama/lgdo/vectorofvectors.py | 35 ++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/src/pygama/lgdo/vectorofvectors.py b/src/pygama/lgdo/vectorofvectors.py index 77d130b4b..b8211a2a2 100644 --- a/src/pygama/lgdo/vectorofvectors.py +++ b/src/pygama/lgdo/vectorofvectors.py @@ -1,4 +1,5 @@ import numpy as np +from numba import guvectorize from .array import Array from .lgdo_utils import * @@ -135,3 +136,37 @@ def __str__(self): return string def __repr__(self): return str(self) + + @staticmethod + def explode(cumulative_length, *arrays, out_arrays=None): + out_len = cumulative_length[-1] if len(cumulative_length) > 0 else 0 + if out_arrays is None: + out_arrays = [] + for ii in len(arrays): out_arrays.append(np.empty(out_len, dtype=arrays[ii].dtype)) + for ii, array_in, array_out in enumerate(zip(arrays, out_arrays)): + if len(array) != len(cumulative_length): + raise ValueError(f"array {ii} has len {len(array)} != cl length {len(cumulative_length)}") + if len(array) != len(out_arrays[ii]): + raise ValueError(f"array {ii} has len {len(array)} != out_array length {len(out_array)}") + allocated_explode(cumulative_length, array_in, array_out) + return out_arrays + + +@guvectorize(["void(int64[:], float32[:], float32[:])", + "void(int64[:], float64[:], float64[:])", + "void(int64[:], int32[:], int32[:])", + "void(int64[:], int64[:], int64[:])"], + "(n),()->(n)", nopython=True, cache=True) +def allocated_explode(cumulative_length, array_in, array_out) + if len(cumulative_length) != len(array_in): + return + if cumulative_length[-1] != len(array_out): + return + + ii = 0 + for jj in range(len(array_out)): + while ii < len(cumulative_length) and jj > cumulative_length[ii]: + ii += 1 + array_out[jj] = array_out[ii] + + From af6ae661e5396642bf954a5227499c37f4a3ad78 Mon Sep 17 00:00:00 2001 From: jasondet Date: Wed, 20 Jul 2022 11:45:47 +0200 Subject: [PATCH 05/11] add explode... out_array still broken in numbified --- src/pygama/lgdo/vectorofvectors.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/pygama/lgdo/vectorofvectors.py b/src/pygama/lgdo/vectorofvectors.py index b8211a2a2..ef6920687 100644 --- a/src/pygama/lgdo/vectorofvectors.py +++ b/src/pygama/lgdo/vectorofvectors.py @@ -142,13 +142,14 @@ def explode(cumulative_length, *arrays, out_arrays=None): out_len = cumulative_length[-1] if len(cumulative_length) > 0 else 0 if out_arrays is None: out_arrays = [] - for ii in len(arrays): out_arrays.append(np.empty(out_len, dtype=arrays[ii].dtype)) - for ii, array_in, array_out in enumerate(zip(arrays, out_arrays)): - if len(array) != len(cumulative_length): - raise ValueError(f"array {ii} has len {len(array)} != cl length {len(cumulative_length)}") - if len(array) != len(out_arrays[ii]): - raise ValueError(f"array {ii} has len {len(array)} != out_array length {len(out_array)}") - allocated_explode(cumulative_length, array_in, array_out) + for array in arrays: + out_arrays.append(np.empty(out_len, dtype=array.dtype)) + for ii in range(len(arrays)): + if len(arrays[ii]) != len(cumulative_length): + raise ValueError(f"array {ii} has len {len(arrays[ii])} != cl length {len(cumulative_length)}") + if cumulative_length[-1] != len(out_arrays[ii]): + raise ValueError(f"out_array length {len(out_arrays[ii])} != cl[-1] = {cumulative_length[-1]}") + allocated_explode(cumulative_length, arrays[ii], out_arrays[ii]) return out_arrays @@ -156,17 +157,16 @@ def explode(cumulative_length, *arrays, out_arrays=None): "void(int64[:], float64[:], float64[:])", "void(int64[:], int32[:], int32[:])", "void(int64[:], int64[:], int64[:])"], - "(n),()->(n)", nopython=True, cache=True) -def allocated_explode(cumulative_length, array_in, array_out) - if len(cumulative_length) != len(array_in): + "(n),(n),(m)", nopython=True, cache=True) +def allocated_explode(cumulative_length, array_in, array_out): + if len(cumulative_length) != len(array_in) or cumulative_length[-1] != len(array_out): + for jj in range(len(array_out)): + array_out[jj] = np.NaN return - if cumulative_length[-1] != len(array_out): - return - ii = 0 for jj in range(len(array_out)): - while ii < len(cumulative_length) and jj > cumulative_length[ii]: + while ii < len(cumulative_length) and jj >= cumulative_length[ii]: ii += 1 - array_out[jj] = array_out[ii] + array_out[jj] = array_in[ii] From 42ae80260002748b0e6b9b060bf7f765116e239c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 20 Jul 2022 09:48:21 +0000 Subject: [PATCH 06/11] style: pre-commit fixes --- src/pygama/lgdo/vectorofvectors.py | 6 ++---- src/pygama/math/histogram.py | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/pygama/lgdo/vectorofvectors.py b/src/pygama/lgdo/vectorofvectors.py index ef6920687..7c63d75c7 100644 --- a/src/pygama/lgdo/vectorofvectors.py +++ b/src/pygama/lgdo/vectorofvectors.py @@ -142,7 +142,7 @@ def explode(cumulative_length, *arrays, out_arrays=None): out_len = cumulative_length[-1] if len(cumulative_length) > 0 else 0 if out_arrays is None: out_arrays = [] - for array in arrays: + for array in arrays: out_arrays.append(np.empty(out_len, dtype=array.dtype)) for ii in range(len(arrays)): if len(arrays[ii]) != len(cumulative_length): @@ -165,8 +165,6 @@ def allocated_explode(cumulative_length, array_in, array_out): return ii = 0 for jj in range(len(array_out)): - while ii < len(cumulative_length) and jj >= cumulative_length[ii]: + while ii < len(cumulative_length) and jj >= cumulative_length[ii]: ii += 1 array_out[jj] = array_in[ii] - - diff --git a/src/pygama/math/histogram.py b/src/pygama/math/histogram.py index 198702ccc..0b8bfc433 100644 --- a/src/pygama/math/histogram.py +++ b/src/pygama/math/histogram.py @@ -439,7 +439,7 @@ def plot_hist(hist, bins, var=None, show_stats=False, stats_hloc=0.75, stats_vlo dmean = stddev/np.sqrt(N) mean, dmean = pgu.get_formatted_stats(mean, dmean, 2) - stats = f'$\mu={mean} \pm {dmean}$\n$\sigma={stddev:#.3g}$' + stats = f'$\\mu={mean} \\pm {dmean}$\n$\\sigma={stddev:#.3g}$' stats_fontsize = rcParams['legend.fontsize'] plt.text(stats_hloc, stats_vloc, stats, transform=plt.gca().transAxes, fontsize = stats_fontsize) From 72b037622063e6982b4eea5a2f6e2c9c06ba1b09 Mon Sep 17 00:00:00 2001 From: jasondet Date: Sun, 24 Jul 2022 23:56:19 +0200 Subject: [PATCH 07/11] explode edits and add group_by --- src/pygama/lgdo/vectorofvectors.py | 71 ++++++++++++++++++++---------- 1 file changed, 47 insertions(+), 24 deletions(-) diff --git a/src/pygama/lgdo/vectorofvectors.py b/src/pygama/lgdo/vectorofvectors.py index ef6920687..cf326a38a 100644 --- a/src/pygama/lgdo/vectorofvectors.py +++ b/src/pygama/lgdo/vectorofvectors.py @@ -1,5 +1,5 @@ import numpy as np -from numba import guvectorize +from numba import jit from .array import Array from .lgdo_utils import * @@ -137,31 +137,41 @@ def __str__(self): def __repr__(self): return str(self) - @staticmethod - def explode(cumulative_length, *arrays, out_arrays=None): - out_len = cumulative_length[-1] if len(cumulative_length) > 0 else 0 - if out_arrays is None: - out_arrays = [] - for array in arrays: - out_arrays.append(np.empty(out_len, dtype=array.dtype)) - for ii in range(len(arrays)): - if len(arrays[ii]) != len(cumulative_length): - raise ValueError(f"array {ii} has len {len(arrays[ii])} != cl length {len(cumulative_length)}") - if cumulative_length[-1] != len(out_arrays[ii]): - raise ValueError(f"out_array length {len(out_arrays[ii])} != cl[-1] = {cumulative_length[-1]}") - allocated_explode(cumulative_length, arrays[ii], out_arrays[ii]) - return out_arrays - - -@guvectorize(["void(int64[:], float32[:], float32[:])", - "void(int64[:], float64[:], float64[:])", - "void(int64[:], int32[:], int32[:])", - "void(int64[:], int64[:], int64[:])"], - "(n),(n),(m)", nopython=True, cache=True) + +@jit(nopython=True) +def allocated_group_by(sorted_array_in, cumulative_length_out): + if len(cumulative_length_out) == 0 and len(sorted_array_in) > 0: + raise ValueError("cumulative_length_out too short ({len(cumulative_length_out)})") + cumulative_length_out.fill(0) + ii = 0 + last_val = sorted_array_in[0] + for val in sorted_array_in: + if val != last_val: + ii += 1 + cumulative_length_out[ii] = cumulative_length_out[ii-1] + if ii >= len(cumulative_length_out): + raise RuntimeError("cumulative_length_out too short ({len(cumulative_length_out)})") + return + last_val = val + cumulative_length_out[ii] += 1 + ii += 1 + return cumulative_length_out[:ii] + +@jit(nopython=True) +def allocated_explode_cl(cumulative_length, array_out): + if cumulative_length[-1] != len(array_out): + raise ValueWarning(f"bad lengths: cl[-1] ({cumulative_length[-1]}) != out ({len(array_out)})") + return + start = 0 + for ii in range(len(cumulative_length)): + for jj in range(cumulative_length[ii]): + array_out[start+jj] = ii + start = cumulative_length[ii] + +@jit(nopython=True) def allocated_explode(cumulative_length, array_in, array_out): if len(cumulative_length) != len(array_in) or cumulative_length[-1] != len(array_out): - for jj in range(len(array_out)): - array_out[jj] = np.NaN + raise ValueWarning(f"bad lengths: cl ({len(cumulative_length)}) != in ({len(array_in)}) and cl[-1] ({cumulative_length[-1]}) != out ({len(array_out)})") return ii = 0 for jj in range(len(array_out)): @@ -169,4 +179,17 @@ def allocated_explode(cumulative_length, array_in, array_out): ii += 1 array_out[jj] = array_in[ii] +def explode_arrays(cumulative_length, *arrays, out_arrays=None): + out_len = cumulative_length[-1] if len(cumulative_length) > 0 else 0 + if out_arrays is None: + out_arrays = [] + for array in arrays: + out_arrays.append(np.empty(out_len, dtype=array.dtype)) + for ii in range(len(arrays)): + if len(arrays[ii]) != len(cumulative_length): + raise ValueError(f"array {ii} has len {len(arrays[ii])} != cl length {len(cumulative_length)}") + if cumulative_length[-1] != len(out_arrays[ii]): + raise ValueError(f"out_array length {len(out_arrays[ii])} != cl[-1] = {cumulative_length[-1]}") + allocated_explode(cumulative_length, arrays[ii], out_arrays[ii]) + return out_arrays From 75bea6b53d6c38ec512c0df56f666d5755e837fa Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 24 Jul 2022 21:58:25 +0000 Subject: [PATCH 08/11] style: pre-commit fixes --- src/pygama/lgdo/vectorofvectors.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/pygama/lgdo/vectorofvectors.py b/src/pygama/lgdo/vectorofvectors.py index 9977c79fc..ab92f8afa 100644 --- a/src/pygama/lgdo/vectorofvectors.py +++ b/src/pygama/lgdo/vectorofvectors.py @@ -197,7 +197,7 @@ def explode_arrays(cumulative_length, *arrays, out_arrays=None): out_len = cumulative_length[-1] if len(cumulative_length) > 0 else 0 if out_arrays is None: out_arrays = [] - for array in arrays: + for array in arrays: out_arrays.append(np.empty(out_len, dtype=array.dtype)) for ii in range(len(arrays)): if len(arrays[ii]) != len(cumulative_length): @@ -206,4 +206,3 @@ def explode_arrays(cumulative_length, *arrays, out_arrays=None): raise ValueError(f"out_array length {len(out_arrays[ii])} != cl[-1] = {cumulative_length[-1]}") allocated_explode(cumulative_length, arrays[ii], out_arrays[ii]) return out_arrays - From e12f70dd7110711ba1263d50904b85846c7a47f9 Mon Sep 17 00:00:00 2001 From: Jason Detwiler Date: Tue, 26 Jul 2022 08:32:30 -0700 Subject: [PATCH 09/11] edits to build_cl and explodes after testing --- src/pygama/lgdo/__init__.py | 2 +- src/pygama/lgdo/vectorofvectors.py | 162 ++++++++++++++++++++++++----- 2 files changed, 139 insertions(+), 25 deletions(-) diff --git a/src/pygama/lgdo/__init__.py b/src/pygama/lgdo/__init__.py index 2798f8f04..019a9141c 100644 --- a/src/pygama/lgdo/__init__.py +++ b/src/pygama/lgdo/__init__.py @@ -36,5 +36,5 @@ from pygama.lgdo.scalar import Scalar from pygama.lgdo.struct import Struct from pygama.lgdo.table import Table -from pygama.lgdo.vectorofvectors import VectorOfVectors +from pygama.lgdo.vectorofvectors import VectorOfVectors, build_cl, explode_cl, explode, explode_arrays from pygama.lgdo.waveform_table import WaveformTable diff --git a/src/pygama/lgdo/vectorofvectors.py b/src/pygama/lgdo/vectorofvectors.py index ab92f8afa..45a16d808 100644 --- a/src/pygama/lgdo/vectorofvectors.py +++ b/src/pygama/lgdo/vectorofvectors.py @@ -8,7 +8,7 @@ from typing import Any import numpy as np -from numba import jit +from numba import jit, njit from pygama.lgdo.array import Array from pygama.lgdo.lgdo_utils import get_element_type @@ -152,11 +152,47 @@ def __str__(self) -> str: def __repr__(self) -> str: return str(self) -@jit(nopython=True) -def allocated_group_by(sorted_array_in, cumulative_length_out): + +def build_cl(sorted_array_in : Array, cumulative_length_out : np.ndarray = None) -> np.ndarray: + """ build a cumulative_length array from an array of sorted data + + So for example if sorted_array_in contains [ 3, 3, 3, 4 ], would return + [ 2, 3 ] + + For a sorted_array_in of indices, this is the inverse of explode_cl() below, + in the sense that doing build_cl(explode_cl(cumulative_length)) would + recover the original cumulative_length. + + Parameters + ---------- + sorted_array_in + Array of data already sorted; each N matching contiguous entries will be + converted into a new row of cumulative_length_out + cumulative_length_out + This is an optional pre-allocated array for the output + cumulative_length. It will always have length <= sorted_array_in, so + giving them the same length is safe if there is not a better guess. + + Returns + ------- + cumulative_length_out + The output cumulative_length. If the user provides a + cumulative_length_out that is too long, this return value is sliced to + contain only the used portion of the allocated memory + """ + if len(sorted_array_in) == 0: return None + sorted_array_in = np.asarray(sorted_array_in) + if cumulative_length_out is None: + cumulative_length_out = np.zeros(len(sorted_array_in), dtype=np.uint64) + else: + cumulative_length_out.fill(0) if len(cumulative_length_out) == 0 and len(sorted_array_in) > 0: raise ValueError("cumulative_length_out too short ({len(cumulative_length_out)})") - cumulative_length_out.fill(0) + return nb_build_cl(sorted_array_in, cumulative_length_out) + +@njit +def nb_build_cl(sorted_array_in : np.ndarray, cumulative_length_out : np.ndarray) -> np.ndarray: + """ numbified inner loop for build_cl """ ii = 0 last_val = sorted_array_in[0] for val in sorted_array_in: @@ -164,45 +200,123 @@ def allocated_group_by(sorted_array_in, cumulative_length_out): ii += 1 cumulative_length_out[ii] = cumulative_length_out[ii-1] if ii >= len(cumulative_length_out): - raise RuntimeError("cumulative_length_out too short ({len(cumulative_length_out)})") - return + raise RuntimeError("cumulative_length_out too short") last_val = val cumulative_length_out[ii] += 1 ii += 1 return cumulative_length_out[:ii] -@jit(nopython=True) -def allocated_explode_cl(cumulative_length, array_out): - if cumulative_length[-1] != len(array_out): - raise ValueWarning(f"bad lengths: cl[-1] ({cumulative_length[-1]}) != out ({len(array_out)})") - return + +def explode_cl(cumulative_length : Array, array_out : np.ndarray = None) -> np.ndarray: + """ explode a cumulative_length array + + So for example if cumulative_length is [ 2, 3 ], would return [ 0, 0, 0, 1] + + This is the inverse of build_cl() above, in the sense that doing + build_cl(explode_cl(cumulative_length)) would recover the original + cumulative_length. + + Parameters + ---------- + cumulative_length + the cumulative_length array to be exploded + array_out + an optional pre-allocated array to hold the exploded cumulative_length. + The length should be equal to cumulative_length[-1] + + Returns + ------- + array_out + the exploded cumulative_length array + """ + cumulative_length = np.asarray(cumulative_length) + out_len = cumulative_length[-1] if len(cumulative_length) > 0 else 0 + if array_out is None: + array_out = np.empty(int(out_len), dtype=np.uint64) + if len(array_out) != out_len: + raise ValueError(f"bad lengths: cl[-1] ({cumulative_length[-1]}) != out ({len(array_out)})") + return nb_explode_cl(cumulative_length, array_out) + +@njit +def nb_explode_cl(cumulative_length : np.ndarray, array_out : np.ndarray) -> np.ndarray: + """ numbified inner loop for explode_cl""" + out_len = cumulative_length[-1] if len(cumulative_length) > 0 else 0 + if len(array_out) != out_len: + raise ValueError("bad lengths") start = 0 for ii in range(len(cumulative_length)): - for jj in range(cumulative_length[ii]): - array_out[start+jj] = ii + nn = int(cumulative_length[ii] - start) + for jj in range(nn): + array_out[int(start+jj)] = ii start = cumulative_length[ii] + return array_out + + + +def explode(cumulative_length : Array, array_in : Array, array_out : np.ndarray = None) -> np.ndarray : + """ explode a data array using a cumulative_length array + + This is identical to allocated_explode_cl, except array_in gets exploded + instead of cumulative_length. So for example, if array_in = [ 3, 4 ] and + cumulative_length = [ 2, 3 ], array_out would be [ 3, 3, 3, 4 ] + + Parameters + ---------- + cumulative_length + the cumulative_length array to use for exploding + array_in + the data to be exploded. Must have same length as cumulative_length + array_out + a pre-allocated array to hold the exploded data. The length should be + equal to cumulative_length[-1] + """ + cumulative_length = np.asarray(cumulative_length) + array_in = np.asarray(array_in) + out_len = cumulative_length[-1] if len(cumulative_length) > 0 else 0 + if array_out is None: + array_out = np.empty(out_len, dtype=array_in.dtype) + if len(cumulative_length) != len(array_in) or len(array_out) != out_len: + raise ValueError(f"bad lengths: cl ({len(cumulative_length)}) != in ({len(array_in)}) and cl[-1] ({cumulative_length[-1]}) != out ({len(array_out)})") + return nb_explode(cumulative_length, array_in, array_out) -@jit(nopython=True) -def allocated_explode(cumulative_length, array_in, array_out): - if len(cumulative_length) != len(array_in) or cumulative_length[-1] != len(array_out): - raise ValueWarning(f"bad lengths: cl ({len(cumulative_length)}) != in ({len(array_in)}) and cl[-1] ({cumulative_length[-1]}) != out ({len(array_out)})") - return +@njit +def nb_explode(cumulative_length : np.ndarray, array_in : np.ndarray, array_out : np.ndarray) -> np.ndarray : + """ numbified inner loop for explode""" + out_len = cumulative_length[-1] if len(cumulative_length) > 0 else 0 + if len(cumulative_length) != len(array_in) or len(array_out) != out_len: + raise ValueError("bad lengths") ii = 0 for jj in range(len(array_out)): while ii < len(cumulative_length) and jj >= cumulative_length[ii]: ii += 1 array_out[jj] = array_in[ii] + return array_out + + +def explode_arrays(cumulative_length : Array, arrays : list, out_arrays : list = None) -> list: + """ explode a set of arrays using a cumulative_length array -def explode_arrays(cumulative_length, *arrays, out_arrays=None): + Parameters + ---------- + cumulative_length + the cumulative_length array to use for exploding + arrays + the data arrays to be exploded. Each array must have same length as + cumulative_length + array_out + an optional list of pre-allocated arrays to hold the exploded data. The + length of the list should be equal to the number of "arrays", and each + entry in array_out should have length cumulative_length[-1]. If not + provided, output arrays are allocated for the user. + """ + cumulative_length = np.asarray(cumulative_length) + for ii in range(len(arrays)): + arrays[ii] = np.asarray(arrays[ii]) out_len = cumulative_length[-1] if len(cumulative_length) > 0 else 0 if out_arrays is None: out_arrays = [] for array in arrays: out_arrays.append(np.empty(out_len, dtype=array.dtype)) for ii in range(len(arrays)): - if len(arrays[ii]) != len(cumulative_length): - raise ValueError(f"array {ii} has len {len(arrays[ii])} != cl length {len(cumulative_length)}") - if cumulative_length[-1] != len(out_arrays[ii]): - raise ValueError(f"out_array length {len(out_arrays[ii])} != cl[-1] = {cumulative_length[-1]}") - allocated_explode(cumulative_length, arrays[ii], out_arrays[ii]) + explode(cumulative_length, arrays[ii], out_arrays[ii]) return out_arrays From 1da0f745084cf7467a78738a46a50068f7bddd74 Mon Sep 17 00:00:00 2001 From: Jason Detwiler Date: Tue, 26 Jul 2022 08:32:58 -0700 Subject: [PATCH 10/11] add test_build_cl_and_explodes() --- tests/lgdo/test_vectorofvectors.py | 36 ++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tests/lgdo/test_vectorofvectors.py b/tests/lgdo/test_vectorofvectors.py index 7a9a370f0..6aa9ae0b7 100644 --- a/tests/lgdo/test_vectorofvectors.py +++ b/tests/lgdo/test_vectorofvectors.py @@ -64,3 +64,39 @@ def test_iter(lgdo_vov): for v in lgdo_vov: assert (v == desired[c]).all() c += 1 + + +def test_build_cl_and_explodes(): + cl = np.array([3, 4], dtype=np.uint64) + exp = np.array([0, 0, 0, 1], dtype=np.uint64) + array = np.array([5, 7], dtype=np.uint64) + array_exp = np.array([5, 5, 5, 7], dtype=np.uint64) + # build_cl + assert (lgdo.build_cl(exp, cl) == cl).all() + assert (lgdo.build_cl(exp) == cl).all() + assert (lgdo.build_cl([0, 0, 0, 1]) == cl).all() + assert (lgdo.build_cl(array_exp, cl) == cl).all() + assert (lgdo.build_cl(array_exp) == cl).all() + assert (lgdo.build_cl([5, 5, 5, 7]) == cl).all() + # explode_cl + assert (lgdo.explode_cl(cl, exp) == exp).all() + assert (lgdo.explode_cl(cl) == exp).all() + assert (lgdo.explode_cl([3,4]) == exp).all() + # inverse functionality + assert (lgdo.build_cl(lgdo.explode_cl(cl)) == cl).all() + assert (lgdo.explode_cl(lgdo.build_cl(array_exp)) == exp).all() + # explode + assert (lgdo.explode(cl, array, array_exp) == array_exp).all() + assert (lgdo.explode(cl, array) == array_exp).all() + assert (lgdo.explode([3, 4], [5, 7]) == array_exp).all() + assert (lgdo.explode(cl, range(len(cl))) == exp).all() + # explode_arrays + out_arrays = lgdo.explode_arrays(cl, [array, range(len(cl))]) + assert len(out_arrays) == 2 + assert (out_arrays[0] == array_exp).all() + assert (out_arrays[1] == exp).all() + out_arrays = lgdo.explode_arrays(cl, [array, range(len(cl))], out_arrays=out_arrays) + assert len(out_arrays) == 2 + assert (out_arrays[0] == array_exp).all() + assert (out_arrays[1] == exp).all() + From 3b6cdf3f86d34b6ad34a197c9c0f796c98d0b9ee Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 26 Jul 2022 15:34:16 +0000 Subject: [PATCH 11/11] style: pre-commit fixes --- src/pygama/lgdo/__init__.py | 8 +++++++- src/pygama/lgdo/vectorofvectors.py | 6 +++--- tests/lgdo/test_vectorofvectors.py | 1 - 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/pygama/lgdo/__init__.py b/src/pygama/lgdo/__init__.py index 019a9141c..4a4bec867 100644 --- a/src/pygama/lgdo/__init__.py +++ b/src/pygama/lgdo/__init__.py @@ -36,5 +36,11 @@ from pygama.lgdo.scalar import Scalar from pygama.lgdo.struct import Struct from pygama.lgdo.table import Table -from pygama.lgdo.vectorofvectors import VectorOfVectors, build_cl, explode_cl, explode, explode_arrays +from pygama.lgdo.vectorofvectors import ( + VectorOfVectors, + build_cl, + explode, + explode_arrays, + explode_cl, +) from pygama.lgdo.waveform_table import WaveformTable diff --git a/src/pygama/lgdo/vectorofvectors.py b/src/pygama/lgdo/vectorofvectors.py index 45a16d808..791b631ef 100644 --- a/src/pygama/lgdo/vectorofvectors.py +++ b/src/pygama/lgdo/vectorofvectors.py @@ -156,7 +156,7 @@ def __repr__(self) -> str: def build_cl(sorted_array_in : Array, cumulative_length_out : np.ndarray = None) -> np.ndarray: """ build a cumulative_length array from an array of sorted data - So for example if sorted_array_in contains [ 3, 3, 3, 4 ], would return + So for example if sorted_array_in contains [ 3, 3, 3, 4 ], would return [ 2, 3 ] For a sorted_array_in of indices, this is the inverse of explode_cl() below, @@ -182,7 +182,7 @@ def build_cl(sorted_array_in : Array, cumulative_length_out : np.ndarray = None) """ if len(sorted_array_in) == 0: return None sorted_array_in = np.asarray(sorted_array_in) - if cumulative_length_out is None: + if cumulative_length_out is None: cumulative_length_out = np.zeros(len(sorted_array_in), dtype=np.uint64) else: cumulative_length_out.fill(0) @@ -303,7 +303,7 @@ def explode_arrays(cumulative_length : Array, arrays : list, out_arrays : list = arrays the data arrays to be exploded. Each array must have same length as cumulative_length - array_out + out_arrays an optional list of pre-allocated arrays to hold the exploded data. The length of the list should be equal to the number of "arrays", and each entry in array_out should have length cumulative_length[-1]. If not diff --git a/tests/lgdo/test_vectorofvectors.py b/tests/lgdo/test_vectorofvectors.py index 6aa9ae0b7..0d18ecf10 100644 --- a/tests/lgdo/test_vectorofvectors.py +++ b/tests/lgdo/test_vectorofvectors.py @@ -99,4 +99,3 @@ def test_build_cl_and_explodes(): assert len(out_arrays) == 2 assert (out_arrays[0] == array_exp).all() assert (out_arrays[1] == exp).all() -