From 64b183eecde49b40721baa68d8b86b503f8306a0 Mon Sep 17 00:00:00 2001
From: Jason Detwiler <jasondet@uw.edu>
Date: Wed, 20 Jul 2022 01:38:14 -0700
Subject: [PATCH 01/11] allow for sigma=0

---
 src/pygama/math/utils.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/pygama/math/utils.py b/src/pygama/math/utils.py
index 2ac380a55..a395d2347 100644
--- a/src/pygama/math/utils.py
+++ b/src/pygama/math/utils.py
@@ -94,6 +94,10 @@ def get_formatted_stats(mean, sigma, ndigs=2):
     convenience function for formatting mean +/- sigma to the right number of
     significant figures.
     """
+    if sigma == 0:
+        fmt = '%d' % ndigs
+        fmt = '%#.' + fmt + 'g'
+        return fmt % mean, fmt % sigma
     sig_pos = int(np.floor(np.log10(abs(sigma))))
     sig_fmt = '%d' % ndigs
     sig_fmt = '%#.' + sig_fmt + 'g'

From ff0e0076e1a4edafc571e3b3741163a090b5bbc4 Mon Sep 17 00:00:00 2001
From: Jason Detwiler <jasondet@uw.edu>
Date: Wed, 20 Jul 2022 01:38:28 -0700
Subject: [PATCH 02/11] fix stats string

---
 src/pygama/math/histogram.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/pygama/math/histogram.py b/src/pygama/math/histogram.py
index 13b44c695..198702ccc 100644
--- a/src/pygama/math/histogram.py
+++ b/src/pygama/math/histogram.py
@@ -439,7 +439,7 @@ def plot_hist(hist, bins, var=None, show_stats=False, stats_hloc=0.75, stats_vlo
         dmean = stddev/np.sqrt(N)
 
         mean, dmean = pgu.get_formatted_stats(mean, dmean, 2)
-        stats = fr'$\mu={mean} \pm {dmean}$\n$\sigma={stddev:#.3g}$'
+        stats = f'$\mu={mean} \pm {dmean}$\n$\sigma={stddev:#.3g}$'
         stats_fontsize = rcParams['legend.fontsize']
         plt.text(stats_hloc, stats_vloc, stats, transform=plt.gca().transAxes, fontsize = stats_fontsize)
 

From c37f4906c3800077dd232e8cd501528df33c1b0b Mon Sep 17 00:00:00 2001
From: Jason Detwiler <jasondet@uw.edu>
Date: Wed, 20 Jul 2022 01:38:59 -0700
Subject: [PATCH 03/11] change to tcm structure

---
 src/pygama/evt/build_tcm.py | 2 +-
 src/pygama/evt/tcm.py       | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/pygama/evt/build_tcm.py b/src/pygama/evt/build_tcm.py
index 7bdb5957c..cd086fe16 100644
--- a/src/pygama/evt/build_tcm.py
+++ b/src/pygama/evt/build_tcm.py
@@ -69,7 +69,7 @@ def build_tcm(input_tables:list, coin_col:str, hash_func:str=r'\d+',
                                       window_ref=window_ref, array_ids=array_ids)
 
     for key in tcm_cols: tcm_cols[key] = lgdo.Array(nda=tcm_cols[key])
-    tcm = lgdo.Table(col_dict=tcm_cols, attrs={ 'tables':str(all_tables), 'hash_func':str(hash_func) })
+    tcm = lgdo.Struct(obj_dict=tcm_cols, attrs={ 'tables':str(all_tables), 'hash_func':str(hash_func) })
 
     if out_file is not None:
         store.write_object(tcm, out_name, out_file, wo_mode=wo_mode)
diff --git a/src/pygama/evt/tcm.py b/src/pygama/evt/tcm.py
index 6ebe1f170..726681b26 100644
--- a/src/pygama/evt/tcm.py
+++ b/src/pygama/evt/tcm.py
@@ -78,7 +78,9 @@ def generate_tcm_cols(coin_data:list, coin_window:float=0, window_ref:str='last'
         raise NotImplementedError(f'window_ref {window_ref}')
 
     # now build the outputs
-    coin_idx = tcm.coin_idx.to_numpy()
+    cumulative_length = np.where(tcm.coin_idx.diff().to_numpy() != 0)[0]
+    cumulative_length[:-1] = cumulative_length[1:]
+    cumulative_length[-1] = len(tcm.coin_idx)
     array_id = tcm.array_id.to_numpy()
     array_idx = tcm.array_idx.to_numpy() if 'array_idx' in tcm else tcm.index.to_numpy() # beautiful!
-    return { 'coin_idx':coin_idx, 'array_id':array_id, 'array_idx':array_idx }
+    return { 'cumulative_length':cumulative_length, 'array_id':array_id, 'array_idx':array_idx }

From 373d3d6082d714efe780fbf8af87bbf9de64fb44 Mon Sep 17 00:00:00 2001
From: Jason Detwiler <jasondet@uw.edu>
Date: Wed, 20 Jul 2022 02:19:47 -0700
Subject: [PATCH 04/11] add explode

---
 src/pygama/lgdo/vectorofvectors.py | 35 ++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/src/pygama/lgdo/vectorofvectors.py b/src/pygama/lgdo/vectorofvectors.py
index 77d130b4b..b8211a2a2 100644
--- a/src/pygama/lgdo/vectorofvectors.py
+++ b/src/pygama/lgdo/vectorofvectors.py
@@ -1,4 +1,5 @@
 import numpy as np
+from numba import guvectorize
 
 from .array import Array
 from .lgdo_utils import *
@@ -135,3 +136,37 @@ def __str__(self):
         return string
 
     def __repr__(self): return str(self)
+
+    @staticmethod
+    def explode(cumulative_length, *arrays, out_arrays=None):
+        out_len = cumulative_length[-1] if len(cumulative_length) > 0 else 0
+        if out_arrays is None:
+            out_arrays = []
+            for ii in len(arrays): out_arrays.append(np.empty(out_len, dtype=arrays[ii].dtype))
+        for ii, array_in, array_out in enumerate(zip(arrays, out_arrays)):
+            if len(array) != len(cumulative_length):
+                raise ValueError(f"array {ii} has len {len(array)} != cl length {len(cumulative_length)}")
+            if len(array) != len(out_arrays[ii]):
+                raise ValueError(f"array {ii} has len {len(array)} != out_array length {len(out_array)}")
+            allocated_explode(cumulative_length, array_in, array_out)
+        return out_arrays
+
+
+@guvectorize(["void(int64[:], float32[:], float32[:])",
+              "void(int64[:], float64[:], float64[:])",
+              "void(int64[:], int32[:], int32[:])",
+              "void(int64[:], int64[:], int64[:])"],
+              "(n),()->(n)", nopython=True, cache=True)
+def allocated_explode(cumulative_length, array_in, array_out)
+    if len(cumulative_length) != len(array_in):
+        return
+    if cumulative_length[-1] != len(array_out):
+        return
+
+    ii = 0
+    for jj in range(len(array_out)):
+        while ii < len(cumulative_length) and jj > cumulative_length[ii]: 
+            ii += 1
+        array_out[jj] = array_out[ii]
+
+

From af6ae661e5396642bf954a5227499c37f4a3ad78 Mon Sep 17 00:00:00 2001
From: jasondet <jasondet@gmail.com>
Date: Wed, 20 Jul 2022 11:45:47 +0200
Subject: [PATCH 05/11] add explode... out_array still broken in numbified

---
 src/pygama/lgdo/vectorofvectors.py | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/src/pygama/lgdo/vectorofvectors.py b/src/pygama/lgdo/vectorofvectors.py
index b8211a2a2..ef6920687 100644
--- a/src/pygama/lgdo/vectorofvectors.py
+++ b/src/pygama/lgdo/vectorofvectors.py
@@ -142,13 +142,14 @@ def explode(cumulative_length, *arrays, out_arrays=None):
         out_len = cumulative_length[-1] if len(cumulative_length) > 0 else 0
         if out_arrays is None:
             out_arrays = []
-            for ii in len(arrays): out_arrays.append(np.empty(out_len, dtype=arrays[ii].dtype))
-        for ii, array_in, array_out in enumerate(zip(arrays, out_arrays)):
-            if len(array) != len(cumulative_length):
-                raise ValueError(f"array {ii} has len {len(array)} != cl length {len(cumulative_length)}")
-            if len(array) != len(out_arrays[ii]):
-                raise ValueError(f"array {ii} has len {len(array)} != out_array length {len(out_array)}")
-            allocated_explode(cumulative_length, array_in, array_out)
+            for array in arrays: 
+                out_arrays.append(np.empty(out_len, dtype=array.dtype))
+        for ii in range(len(arrays)):
+            if len(arrays[ii]) != len(cumulative_length):
+                raise ValueError(f"array {ii} has len {len(arrays[ii])} != cl length {len(cumulative_length)}")
+            if cumulative_length[-1] != len(out_arrays[ii]):
+                raise ValueError(f"out_array length {len(out_arrays[ii])} != cl[-1] = {cumulative_length[-1]}")
+            allocated_explode(cumulative_length, arrays[ii], out_arrays[ii])
         return out_arrays
 
 
@@ -156,17 +157,16 @@ def explode(cumulative_length, *arrays, out_arrays=None):
               "void(int64[:], float64[:], float64[:])",
               "void(int64[:], int32[:], int32[:])",
               "void(int64[:], int64[:], int64[:])"],
-              "(n),()->(n)", nopython=True, cache=True)
-def allocated_explode(cumulative_length, array_in, array_out)
-    if len(cumulative_length) != len(array_in):
+              "(n),(n),(m)", nopython=True, cache=True)
+def allocated_explode(cumulative_length, array_in, array_out):
+    if len(cumulative_length) != len(array_in) or cumulative_length[-1] != len(array_out):
+        for jj in range(len(array_out)):
+            array_out[jj] = np.NaN
         return
-    if cumulative_length[-1] != len(array_out):
-        return
-
     ii = 0
     for jj in range(len(array_out)):
-        while ii < len(cumulative_length) and jj > cumulative_length[ii]: 
+        while ii < len(cumulative_length) and jj >= cumulative_length[ii]: 
             ii += 1
-        array_out[jj] = array_out[ii]
+        array_out[jj] = array_in[ii]
 
 

From 42ae80260002748b0e6b9b060bf7f765116e239c Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 20 Jul 2022 09:48:21 +0000
Subject: [PATCH 06/11] style: pre-commit fixes

---
 src/pygama/lgdo/vectorofvectors.py | 6 ++----
 src/pygama/math/histogram.py       | 2 +-
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/src/pygama/lgdo/vectorofvectors.py b/src/pygama/lgdo/vectorofvectors.py
index ef6920687..7c63d75c7 100644
--- a/src/pygama/lgdo/vectorofvectors.py
+++ b/src/pygama/lgdo/vectorofvectors.py
@@ -142,7 +142,7 @@ def explode(cumulative_length, *arrays, out_arrays=None):
         out_len = cumulative_length[-1] if len(cumulative_length) > 0 else 0
         if out_arrays is None:
             out_arrays = []
-            for array in arrays: 
+            for array in arrays:
                 out_arrays.append(np.empty(out_len, dtype=array.dtype))
         for ii in range(len(arrays)):
             if len(arrays[ii]) != len(cumulative_length):
@@ -165,8 +165,6 @@ def allocated_explode(cumulative_length, array_in, array_out):
         return
     ii = 0
     for jj in range(len(array_out)):
-        while ii < len(cumulative_length) and jj >= cumulative_length[ii]: 
+        while ii < len(cumulative_length) and jj >= cumulative_length[ii]:
             ii += 1
         array_out[jj] = array_in[ii]
-
-
diff --git a/src/pygama/math/histogram.py b/src/pygama/math/histogram.py
index 198702ccc..0b8bfc433 100644
--- a/src/pygama/math/histogram.py
+++ b/src/pygama/math/histogram.py
@@ -439,7 +439,7 @@ def plot_hist(hist, bins, var=None, show_stats=False, stats_hloc=0.75, stats_vlo
         dmean = stddev/np.sqrt(N)
 
         mean, dmean = pgu.get_formatted_stats(mean, dmean, 2)
-        stats = f'$\mu={mean} \pm {dmean}$\n$\sigma={stddev:#.3g}$'
+        stats = f'$\\mu={mean} \\pm {dmean}$\n$\\sigma={stddev:#.3g}$'
         stats_fontsize = rcParams['legend.fontsize']
         plt.text(stats_hloc, stats_vloc, stats, transform=plt.gca().transAxes, fontsize = stats_fontsize)
 

From 72b037622063e6982b4eea5a2f6e2c9c06ba1b09 Mon Sep 17 00:00:00 2001
From: jasondet <jasondet@gmail.com>
Date: Sun, 24 Jul 2022 23:56:19 +0200
Subject: [PATCH 07/11] explode edits and add group_by

---
 src/pygama/lgdo/vectorofvectors.py | 71 ++++++++++++++++++++----------
 1 file changed, 47 insertions(+), 24 deletions(-)

diff --git a/src/pygama/lgdo/vectorofvectors.py b/src/pygama/lgdo/vectorofvectors.py
index ef6920687..cf326a38a 100644
--- a/src/pygama/lgdo/vectorofvectors.py
+++ b/src/pygama/lgdo/vectorofvectors.py
@@ -1,5 +1,5 @@
 import numpy as np
-from numba import guvectorize
+from numba import jit
 
 from .array import Array
 from .lgdo_utils import *
@@ -137,31 +137,41 @@ def __str__(self):
 
     def __repr__(self): return str(self)
 
-    @staticmethod
-    def explode(cumulative_length, *arrays, out_arrays=None):
-        out_len = cumulative_length[-1] if len(cumulative_length) > 0 else 0
-        if out_arrays is None:
-            out_arrays = []
-            for array in arrays: 
-                out_arrays.append(np.empty(out_len, dtype=array.dtype))
-        for ii in range(len(arrays)):
-            if len(arrays[ii]) != len(cumulative_length):
-                raise ValueError(f"array {ii} has len {len(arrays[ii])} != cl length {len(cumulative_length)}")
-            if cumulative_length[-1] != len(out_arrays[ii]):
-                raise ValueError(f"out_array length {len(out_arrays[ii])} != cl[-1] = {cumulative_length[-1]}")
-            allocated_explode(cumulative_length, arrays[ii], out_arrays[ii])
-        return out_arrays
-
-
-@guvectorize(["void(int64[:], float32[:], float32[:])",
-              "void(int64[:], float64[:], float64[:])",
-              "void(int64[:], int32[:], int32[:])",
-              "void(int64[:], int64[:], int64[:])"],
-              "(n),(n),(m)", nopython=True, cache=True)
+
+@jit(nopython=True)
+def allocated_group_by(sorted_array_in, cumulative_length_out):
+    if len(cumulative_length_out) == 0 and len(sorted_array_in) > 0:
+        raise ValueError("cumulative_length_out too short ({len(cumulative_length_out)})")
+    cumulative_length_out.fill(0)
+    ii = 0
+    last_val = sorted_array_in[0]
+    for val in sorted_array_in:
+        if val != last_val:
+            ii += 1
+            cumulative_length_out[ii] = cumulative_length_out[ii-1]
+            if ii >= len(cumulative_length_out):
+                raise RuntimeError("cumulative_length_out too short ({len(cumulative_length_out)})")
+                return
+            last_val = val
+        cumulative_length_out[ii] += 1
+    ii += 1
+    return cumulative_length_out[:ii]
+
+@jit(nopython=True)
+def allocated_explode_cl(cumulative_length, array_out):
+    if cumulative_length[-1] != len(array_out):
+        raise ValueWarning(f"bad lengths: cl[-1] ({cumulative_length[-1]}) != out ({len(array_out)})")
+        return
+    start = 0
+    for ii in range(len(cumulative_length)):
+        for jj in range(cumulative_length[ii]):
+            array_out[start+jj] = ii
+        start = cumulative_length[ii]
+
+@jit(nopython=True)
 def allocated_explode(cumulative_length, array_in, array_out):
     if len(cumulative_length) != len(array_in) or cumulative_length[-1] != len(array_out):
-        for jj in range(len(array_out)):
-            array_out[jj] = np.NaN
+        raise ValueWarning(f"bad lengths: cl ({len(cumulative_length)}) != in ({len(array_in)}) and cl[-1] ({cumulative_length[-1]}) != out ({len(array_out)})")
         return
     ii = 0
     for jj in range(len(array_out)):
@@ -169,4 +179,17 @@ def allocated_explode(cumulative_length, array_in, array_out):
             ii += 1
         array_out[jj] = array_in[ii]
 
+def explode_arrays(cumulative_length, *arrays, out_arrays=None):
+    out_len = cumulative_length[-1] if len(cumulative_length) > 0 else 0
+    if out_arrays is None:
+        out_arrays = []
+        for array in arrays: 
+            out_arrays.append(np.empty(out_len, dtype=array.dtype))
+    for ii in range(len(arrays)):
+        if len(arrays[ii]) != len(cumulative_length):
+            raise ValueError(f"array {ii} has len {len(arrays[ii])} != cl length {len(cumulative_length)}")
+        if cumulative_length[-1] != len(out_arrays[ii]):
+            raise ValueError(f"out_array length {len(out_arrays[ii])} != cl[-1] = {cumulative_length[-1]}")
+        allocated_explode(cumulative_length, arrays[ii], out_arrays[ii])
+    return out_arrays
 

From 75bea6b53d6c38ec512c0df56f666d5755e837fa Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 24 Jul 2022 21:58:25 +0000
Subject: [PATCH 08/11] style: pre-commit fixes

---
 src/pygama/lgdo/vectorofvectors.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/pygama/lgdo/vectorofvectors.py b/src/pygama/lgdo/vectorofvectors.py
index 9977c79fc..ab92f8afa 100644
--- a/src/pygama/lgdo/vectorofvectors.py
+++ b/src/pygama/lgdo/vectorofvectors.py
@@ -197,7 +197,7 @@ def explode_arrays(cumulative_length, *arrays, out_arrays=None):
     out_len = cumulative_length[-1] if len(cumulative_length) > 0 else 0
     if out_arrays is None:
         out_arrays = []
-        for array in arrays: 
+        for array in arrays:
             out_arrays.append(np.empty(out_len, dtype=array.dtype))
     for ii in range(len(arrays)):
         if len(arrays[ii]) != len(cumulative_length):
@@ -206,4 +206,3 @@ def explode_arrays(cumulative_length, *arrays, out_arrays=None):
             raise ValueError(f"out_array length {len(out_arrays[ii])} != cl[-1] = {cumulative_length[-1]}")
         allocated_explode(cumulative_length, arrays[ii], out_arrays[ii])
     return out_arrays
-

From e12f70dd7110711ba1263d50904b85846c7a47f9 Mon Sep 17 00:00:00 2001
From: Jason Detwiler <jasondet@uw.edu>
Date: Tue, 26 Jul 2022 08:32:30 -0700
Subject: [PATCH 09/11] edits to build_cl and explodes after testing

---
 src/pygama/lgdo/__init__.py        |   2 +-
 src/pygama/lgdo/vectorofvectors.py | 162 ++++++++++++++++++++++++-----
 2 files changed, 139 insertions(+), 25 deletions(-)

diff --git a/src/pygama/lgdo/__init__.py b/src/pygama/lgdo/__init__.py
index 2798f8f04..019a9141c 100644
--- a/src/pygama/lgdo/__init__.py
+++ b/src/pygama/lgdo/__init__.py
@@ -36,5 +36,5 @@
 from pygama.lgdo.scalar import Scalar
 from pygama.lgdo.struct import Struct
 from pygama.lgdo.table import Table
-from pygama.lgdo.vectorofvectors import VectorOfVectors
+from pygama.lgdo.vectorofvectors import VectorOfVectors, build_cl, explode_cl, explode, explode_arrays
 from pygama.lgdo.waveform_table import WaveformTable
diff --git a/src/pygama/lgdo/vectorofvectors.py b/src/pygama/lgdo/vectorofvectors.py
index ab92f8afa..45a16d808 100644
--- a/src/pygama/lgdo/vectorofvectors.py
+++ b/src/pygama/lgdo/vectorofvectors.py
@@ -8,7 +8,7 @@
 from typing import Any
 
 import numpy as np
-from numba import jit
+from numba import jit, njit
 
 from pygama.lgdo.array import Array
 from pygama.lgdo.lgdo_utils import get_element_type
@@ -152,11 +152,47 @@ def __str__(self) -> str:
     def __repr__(self) -> str:
         return str(self)
 
-@jit(nopython=True)
-def allocated_group_by(sorted_array_in, cumulative_length_out):
+
+def build_cl(sorted_array_in : Array, cumulative_length_out : np.ndarray = None) -> np.ndarray:
+    """ build a cumulative_length array from an array of sorted data
+
+    So for example if sorted_array_in contains [ 3, 3, 3, 4 ], would return 
+    [ 2, 3 ]
+
+    For a sorted_array_in of indices, this is the inverse of explode_cl() below,
+    in the sense that doing build_cl(explode_cl(cumulative_length)) would
+    recover the original cumulative_length.
+
+    Parameters
+    ----------
+    sorted_array_in
+        Array of data already sorted; each N matching contiguous entries will be
+        converted into a new row of cumulative_length_out
+    cumulative_length_out
+        This is an optional pre-allocated array for the output
+        cumulative_length. It will always have length <= sorted_array_in, so
+        giving them the same length is safe if there is not a better guess.
+
+    Returns
+    -------
+    cumulative_length_out
+        The output cumulative_length. If the user provides a
+        cumulative_length_out that is too long, this return value is sliced to
+        contain only the used portion of the allocated memory
+    """
+    if len(sorted_array_in) == 0: return None
+    sorted_array_in = np.asarray(sorted_array_in)
+    if cumulative_length_out is None: 
+        cumulative_length_out = np.zeros(len(sorted_array_in), dtype=np.uint64)
+    else:
+        cumulative_length_out.fill(0)
     if len(cumulative_length_out) == 0 and len(sorted_array_in) > 0:
         raise ValueError("cumulative_length_out too short ({len(cumulative_length_out)})")
-    cumulative_length_out.fill(0)
+    return nb_build_cl(sorted_array_in, cumulative_length_out)
+
+@njit
+def nb_build_cl(sorted_array_in : np.ndarray, cumulative_length_out : np.ndarray) -> np.ndarray:
+    """ numbified inner loop for build_cl """
     ii = 0
     last_val = sorted_array_in[0]
     for val in sorted_array_in:
@@ -164,45 +200,123 @@ def allocated_group_by(sorted_array_in, cumulative_length_out):
             ii += 1
             cumulative_length_out[ii] = cumulative_length_out[ii-1]
             if ii >= len(cumulative_length_out):
-                raise RuntimeError("cumulative_length_out too short ({len(cumulative_length_out)})")
-                return
+                raise RuntimeError("cumulative_length_out too short")
             last_val = val
         cumulative_length_out[ii] += 1
     ii += 1
     return cumulative_length_out[:ii]
 
-@jit(nopython=True)
-def allocated_explode_cl(cumulative_length, array_out):
-    if cumulative_length[-1] != len(array_out):
-        raise ValueWarning(f"bad lengths: cl[-1] ({cumulative_length[-1]}) != out ({len(array_out)})")
-        return
+
+def explode_cl(cumulative_length : Array, array_out : np.ndarray = None) -> np.ndarray:
+    """ explode a cumulative_length array
+
+    So for example if cumulative_length is [ 2, 3 ], would return [ 0, 0, 0, 1]
+
+    This is the inverse of build_cl() above, in the sense that doing
+    build_cl(explode_cl(cumulative_length)) would recover the original
+    cumulative_length.
+
+    Parameters
+    ----------
+    cumulative_length
+        the cumulative_length array to be exploded
+    array_out
+        an optional pre-allocated array to hold the exploded cumulative_length.
+        The length should be equal to cumulative_length[-1]
+
+    Returns
+    -------
+    array_out
+        the exploded cumulative_length array
+    """
+    cumulative_length = np.asarray(cumulative_length)
+    out_len = cumulative_length[-1] if len(cumulative_length) > 0 else 0
+    if array_out is None:
+        array_out = np.empty(int(out_len), dtype=np.uint64)
+    if len(array_out) != out_len:
+        raise ValueError(f"bad lengths: cl[-1] ({cumulative_length[-1]}) != out ({len(array_out)})")
+    return nb_explode_cl(cumulative_length, array_out)
+
+@njit
+def nb_explode_cl(cumulative_length : np.ndarray, array_out : np.ndarray) -> np.ndarray:
+    """ numbified inner loop for explode_cl"""
+    out_len = cumulative_length[-1] if len(cumulative_length) > 0 else 0
+    if len(array_out) != out_len:
+        raise ValueError("bad lengths")
     start = 0
     for ii in range(len(cumulative_length)):
-        for jj in range(cumulative_length[ii]):
-            array_out[start+jj] = ii
+        nn = int(cumulative_length[ii] - start)
+        for jj in range(nn):
+            array_out[int(start+jj)] = ii
         start = cumulative_length[ii]
+    return array_out
+
+
+
+def explode(cumulative_length : Array, array_in : Array, array_out : np.ndarray = None) -> np.ndarray :
+    """ explode a data array using a cumulative_length array
+
+    This is identical to allocated_explode_cl, except array_in gets exploded
+    instead of cumulative_length. So for example, if array_in = [ 3, 4 ] and
+    cumulative_length = [ 2, 3 ], array_out would be [ 3, 3, 3, 4 ]
+
+    Parameters
+    ----------
+    cumulative_length
+        the cumulative_length array to use for exploding
+    array_in
+        the data to be exploded. Must have same length as cumulative_length
+    array_out
+        a pre-allocated array to hold the exploded data. The length should be
+        equal to cumulative_length[-1]
+    """
+    cumulative_length = np.asarray(cumulative_length)
+    array_in = np.asarray(array_in)
+    out_len = cumulative_length[-1] if len(cumulative_length) > 0 else 0
+    if array_out is None:
+        array_out = np.empty(out_len, dtype=array_in.dtype)
+    if len(cumulative_length) != len(array_in) or len(array_out) != out_len:
+        raise ValueError(f"bad lengths: cl ({len(cumulative_length)}) != in ({len(array_in)}) and cl[-1] ({cumulative_length[-1]}) != out ({len(array_out)})")
+    return nb_explode(cumulative_length, array_in, array_out)
 
-@jit(nopython=True)
-def allocated_explode(cumulative_length, array_in, array_out):
-    if len(cumulative_length) != len(array_in) or cumulative_length[-1] != len(array_out):
-        raise ValueWarning(f"bad lengths: cl ({len(cumulative_length)}) != in ({len(array_in)}) and cl[-1] ({cumulative_length[-1]}) != out ({len(array_out)})")
-        return
+@njit
+def nb_explode(cumulative_length : np.ndarray, array_in : np.ndarray, array_out : np.ndarray) -> np.ndarray :
+    """ numbified inner loop for explode"""
+    out_len = cumulative_length[-1] if len(cumulative_length) > 0 else 0
+    if len(cumulative_length) != len(array_in) or len(array_out) != out_len:
+        raise ValueError("bad lengths")
     ii = 0
     for jj in range(len(array_out)):
         while ii < len(cumulative_length) and jj >= cumulative_length[ii]:
             ii += 1
         array_out[jj] = array_in[ii]
+    return array_out
+
+
+def explode_arrays(cumulative_length : Array, arrays : list, out_arrays : list = None) -> list:
+    """ explode a set of arrays using a cumulative_length array
 
-def explode_arrays(cumulative_length, *arrays, out_arrays=None):
+    Parameters
+    ----------
+    cumulative_length
+        the cumulative_length array to use for exploding
+    arrays
+        the data arrays to be exploded. Each array must have same length as
+        cumulative_length
+    array_out
+        an optional list of pre-allocated arrays to hold the exploded data. The
+        length of the list should be equal to the number of "arrays", and each
+        entry in array_out should have length cumulative_length[-1]. If not
+        provided, output arrays are allocated for the user.
+    """
+    cumulative_length = np.asarray(cumulative_length)
+    for ii in range(len(arrays)):
+        arrays[ii] = np.asarray(arrays[ii])
     out_len = cumulative_length[-1] if len(cumulative_length) > 0 else 0
     if out_arrays is None:
         out_arrays = []
         for array in arrays:
             out_arrays.append(np.empty(out_len, dtype=array.dtype))
     for ii in range(len(arrays)):
-        if len(arrays[ii]) != len(cumulative_length):
-            raise ValueError(f"array {ii} has len {len(arrays[ii])} != cl length {len(cumulative_length)}")
-        if cumulative_length[-1] != len(out_arrays[ii]):
-            raise ValueError(f"out_array length {len(out_arrays[ii])} != cl[-1] = {cumulative_length[-1]}")
-        allocated_explode(cumulative_length, arrays[ii], out_arrays[ii])
+        explode(cumulative_length, arrays[ii], out_arrays[ii])
     return out_arrays

From 1da0f745084cf7467a78738a46a50068f7bddd74 Mon Sep 17 00:00:00 2001
From: Jason Detwiler <jasondet@uw.edu>
Date: Tue, 26 Jul 2022 08:32:58 -0700
Subject: [PATCH 10/11] add test_build_cl_and_explodes()

---
 tests/lgdo/test_vectorofvectors.py | 36 ++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/tests/lgdo/test_vectorofvectors.py b/tests/lgdo/test_vectorofvectors.py
index 7a9a370f0..6aa9ae0b7 100644
--- a/tests/lgdo/test_vectorofvectors.py
+++ b/tests/lgdo/test_vectorofvectors.py
@@ -64,3 +64,39 @@ def test_iter(lgdo_vov):
     for v in lgdo_vov:
         assert (v == desired[c]).all()
         c += 1
+
+
+def test_build_cl_and_explodes():
+    cl = np.array([3, 4], dtype=np.uint64)
+    exp = np.array([0, 0, 0, 1], dtype=np.uint64)
+    array = np.array([5, 7], dtype=np.uint64)
+    array_exp = np.array([5, 5, 5, 7], dtype=np.uint64)
+    # build_cl
+    assert (lgdo.build_cl(exp, cl)       == cl).all()
+    assert (lgdo.build_cl(exp)           == cl).all()
+    assert (lgdo.build_cl([0, 0, 0, 1])  == cl).all()
+    assert (lgdo.build_cl(array_exp, cl) == cl).all()
+    assert (lgdo.build_cl(array_exp)     == cl).all()
+    assert (lgdo.build_cl([5, 5, 5, 7])  == cl).all()
+    # explode_cl
+    assert (lgdo.explode_cl(cl, exp) == exp).all()
+    assert (lgdo.explode_cl(cl)      == exp).all()
+    assert (lgdo.explode_cl([3,4])   == exp).all()
+    # inverse functionality
+    assert (lgdo.build_cl(lgdo.explode_cl(cl)) == cl).all()
+    assert (lgdo.explode_cl(lgdo.build_cl(array_exp)) == exp).all()
+    # explode
+    assert (lgdo.explode(cl, array, array_exp) == array_exp).all()
+    assert (lgdo.explode(cl, array)            == array_exp).all()
+    assert (lgdo.explode([3, 4], [5, 7])       == array_exp).all()
+    assert (lgdo.explode(cl, range(len(cl)))   == exp).all()
+    # explode_arrays
+    out_arrays = lgdo.explode_arrays(cl, [array, range(len(cl))])
+    assert len(out_arrays) == 2
+    assert (out_arrays[0] == array_exp).all()
+    assert (out_arrays[1] == exp).all()
+    out_arrays = lgdo.explode_arrays(cl, [array, range(len(cl))], out_arrays=out_arrays)
+    assert len(out_arrays) == 2
+    assert (out_arrays[0] == array_exp).all()
+    assert (out_arrays[1] == exp).all()
+

From 3b6cdf3f86d34b6ad34a197c9c0f796c98d0b9ee Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 26 Jul 2022 15:34:16 +0000
Subject: [PATCH 11/11] style: pre-commit fixes

---
 src/pygama/lgdo/__init__.py        | 8 +++++++-
 src/pygama/lgdo/vectorofvectors.py | 6 +++---
 tests/lgdo/test_vectorofvectors.py | 1 -
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/pygama/lgdo/__init__.py b/src/pygama/lgdo/__init__.py
index 019a9141c..4a4bec867 100644
--- a/src/pygama/lgdo/__init__.py
+++ b/src/pygama/lgdo/__init__.py
@@ -36,5 +36,11 @@
 from pygama.lgdo.scalar import Scalar
 from pygama.lgdo.struct import Struct
 from pygama.lgdo.table import Table
-from pygama.lgdo.vectorofvectors import VectorOfVectors, build_cl, explode_cl, explode, explode_arrays
+from pygama.lgdo.vectorofvectors import (
+    VectorOfVectors,
+    build_cl,
+    explode,
+    explode_arrays,
+    explode_cl,
+)
 from pygama.lgdo.waveform_table import WaveformTable
diff --git a/src/pygama/lgdo/vectorofvectors.py b/src/pygama/lgdo/vectorofvectors.py
index 45a16d808..791b631ef 100644
--- a/src/pygama/lgdo/vectorofvectors.py
+++ b/src/pygama/lgdo/vectorofvectors.py
@@ -156,7 +156,7 @@ def __repr__(self) -> str:
 def build_cl(sorted_array_in : Array, cumulative_length_out : np.ndarray = None) -> np.ndarray:
     """ build a cumulative_length array from an array of sorted data
 
-    So for example if sorted_array_in contains [ 3, 3, 3, 4 ], would return 
+    So for example if sorted_array_in contains [ 3, 3, 3, 4 ], would return
     [ 2, 3 ]
 
     For a sorted_array_in of indices, this is the inverse of explode_cl() below,
@@ -182,7 +182,7 @@ def build_cl(sorted_array_in : Array, cumulative_length_out : np.ndarray = None)
     """
     if len(sorted_array_in) == 0: return None
     sorted_array_in = np.asarray(sorted_array_in)
-    if cumulative_length_out is None: 
+    if cumulative_length_out is None:
         cumulative_length_out = np.zeros(len(sorted_array_in), dtype=np.uint64)
     else:
         cumulative_length_out.fill(0)
@@ -303,7 +303,7 @@ def explode_arrays(cumulative_length : Array, arrays : list, out_arrays : list =
     arrays
         the data arrays to be exploded. Each array must have same length as
         cumulative_length
-    array_out
+    out_arrays
         an optional list of pre-allocated arrays to hold the exploded data. The
         length of the list should be equal to the number of "arrays", and each
         entry in array_out should have length cumulative_length[-1]. If not
diff --git a/tests/lgdo/test_vectorofvectors.py b/tests/lgdo/test_vectorofvectors.py
index 6aa9ae0b7..0d18ecf10 100644
--- a/tests/lgdo/test_vectorofvectors.py
+++ b/tests/lgdo/test_vectorofvectors.py
@@ -99,4 +99,3 @@ def test_build_cl_and_explodes():
     assert len(out_arrays) == 2
     assert (out_arrays[0] == array_exp).all()
     assert (out_arrays[1] == exp).all()
-