From 1fe62285da9dc2af341c1e91bb430b6645516148 Mon Sep 17 00:00:00 2001
From: ioanaif <ioana.ifrim@gmail.com>
Date: Tue, 8 Mar 2022 11:26:05 +0200
Subject: [PATCH 1/2] ak.run_lengths and testing

---
 src/awkward/_v2/behaviors/string.py           |  58 +--
 .../operations/structure/ak_run_lengths.py    | 415 +++++++++---------
 tests/v2/test_0733-run_lengths.py             |  63 +++
 3 files changed, 294 insertions(+), 242 deletions(-)
 create mode 100644 tests/v2/test_0733-run_lengths.py

diff --git a/src/awkward/_v2/behaviors/string.py b/src/awkward/_v2/behaviors/string.py
index 4486af1242..d4519a58be 100644
--- a/src/awkward/_v2/behaviors/string.py
+++ b/src/awkward/_v2/behaviors/string.py
@@ -100,44 +100,48 @@ def __str__(self):
 #             yield x.__bytes__()
 
 
-# class StringBehavior(ak._v2.highlevel.Array):
-#     __name__ = "Array"
-
-#     def __iter__(self):
-#         for x in super(StringBehavior, self).__iter__():
-#             yield x.__str__()
+class StringBehavior(Array):
+    __name__ = "Array"
 
+    def __iter__(self):
+        for x in super().__iter__():
+            yield x.__str__()
 
-# def _string_equal(one, two):
-#     nplike = ak.nplike.of(one, two)
-#     behavior = ak._v2._util.behaviorof(one, two)
 
-#     one, two = ak.without_parameters(one).layout, ak.without_parameters(two).layout
+def _string_equal(one, two):
+    nplike = ak.nplike.of(one, two)
+    behavior = ak._v2._util.behavior_of(one, two)
 
-#     # first condition: string lengths must be the same
-#     counts1 = nplike.asarray(one.count(axis=-1))
-#     counts2 = nplike.asarray(two.count(axis=-1))
+    one, two = (
+        ak._v2.operations.structure.without_parameters(one).layout,
+        ak._v2.operations.structure.without_parameters(two).layout,
+    )
 
-#     out = counts1 == counts2
+    # first condition: string lengths must be the same
+    counts1 = nplike.asarray(one.count(axis=-1))
+    counts2 = nplike.asarray(two.count(axis=-1))
 
-#     # only compare characters in strings that are possibly equal (same length)
-#     possible = nplike.logical_and(out, counts1)
-#     possible_counts = counts1[possible]
+    out = counts1 == counts2
 
-#     if len(possible_counts) > 0:
-#         onepossible = one[possible]
-#         twopossible = two[possible]
+    # only compare characters in strings that are possibly equal (same length)
+    possible = nplike.logical_and(out, counts1)
+    possible_counts = counts1[possible]
 
-#         reduced = ak.all(ak.Array(onepossible) == ak.Array(twopossible), axis=-1).layout
+    if len(possible_counts) > 0:
+        onepossible = one[possible]
+        twopossible = two[possible]
 
-#         # update same-length strings with a verdict about their characters
-#         out[possible] = reduced
+        reduced = ak._v2.operations.reducers.all(
+            ak._v2.Array(onepossible) == ak._v2.Array(twopossible), axis=-1
+        ).layout
+        # update same-length strings with a verdict about their characters
+        out[possible] = reduced.data
 
-#     return ak._v2._util.wrap(ak._v2.contents.NumpyArray(out), behavior)
+    return ak._v2._util.wrap(ak._v2.contents.NumpyArray(out), behavior)
 
 
-# def _string_notequal(one, two):
-#     return ~_string_equal(one, two)
+def _string_notequal(one, two):
+    return ~_string_equal(one, two)
 
 
 # def _string_broadcast(layout, offsets):
@@ -250,7 +254,7 @@ def register(behavior):
     # behavior[ak.nplike.numpy.equal, "bytestring", "bytestring"] = _string_equal
     # behavior[ak.nplike.numpy.equal, "string", "string"] = _string_equal
     # behavior[ak.nplike.numpy.not_equal, "bytestring", "bytestring"] = _string_notequal
-    # behavior[ak.nplike.numpy.not_equal, "string", "string"] = _string_notequal
+    behavior[ak.nplike.numpy.not_equal, "string", "string"] = _string_notequal
 
     # behavior["__broadcast__", "bytestring"] = _string_broadcast
     # behavior["__broadcast__", "string"] = _string_broadcast
diff --git a/src/awkward/_v2/operations/structure/ak_run_lengths.py b/src/awkward/_v2/operations/structure/ak_run_lengths.py
index 8095f01d06..e7c4d023a8 100644
--- a/src/awkward/_v2/operations/structure/ak_run_lengths.py
+++ b/src/awkward/_v2/operations/structure/ak_run_lengths.py
@@ -6,218 +6,203 @@
 
 
 def run_lengths(array, highlevel=True, behavior=None):
-    raise ak._v2._util.error(NotImplementedError)
-
-
-#     """
-#     Args:
-#         array: Data containing runs of numbers to count.
-#         highlevel (bool): If True, return an #ak.Array; otherwise, return
-#             a low-level #ak.layout.Content subclass.
-#         behavior (None or dict): Custom #ak.behavior for the output array, if
-#             high-level.
-
-#     Computes the lengths of sequences of identical values at the deepest level
-#     of nesting, returning an array with the same structure but with `int64` type.
-
-#     For example,
-
-#         >>> array = ak.Array([1.1, 1.1, 1.1, 2.2, 3.3, 3.3, 4.4, 4.4, 5.5])
-#         >>> ak.run_lengths(array)
-#         <Array [3, 1, 2, 2, 1] type='5 * int64'>
-
-#     There are 3 instances of 1.1, followed by 1 instance of 2.2, 2 instances of 3.3,
-#     2 instances of 4.4, and 1 instance of 5.5.
-
-#     The order and uniqueness of the input data doesn't matter,
-
-#         >>> array = ak.Array([1.1, 1.1, 1.1, 5.5, 4.4, 4.4, 1.1, 1.1, 5.5])
-#         >>> ak.run_lengths(array)
-#         <Array [3, 1, 2, 2, 1] type='5 * int64'>
-
-#     just the difference between each value and its neighbors.
-
-#     The data can be nested, but runs don't cross list boundaries.
-
-#         >>> array = ak.Array([[1.1, 1.1, 1.1, 2.2, 3.3], [3.3, 4.4], [4.4, 5.5]])
-#         >>> ak.run_lengths(array)
-#         <Array [[3, 1, 1], [1, 1], [1, 1]] type='3 * var * int64'>
-
-#     This function recognizes strings as distinguishable values.
-
-#         >>> array = ak.Array([["one", "one"], ["one", "two", "two"], ["three", "two", "two"]])
-#         >>> ak.run_lengths(array)
-#         <Array [[2], [1, 2], [1, 2]] type='3 * var * int64'>
-
-#     Note that this can be combined with #ak.argsort and #ak.unflatten to compute
-#     a "group by" operation:
-
-#         >>> array = ak.Array([{"x": 1, "y": 1.1}, {"x": 2, "y": 2.2}, {"x": 1, "y": 1.1},
-#         ...                   {"x": 3, "y": 3.3}, {"x": 1, "y": 1.1}, {"x": 2, "y": 2.2}])
-#         >>> sorted = array[ak.argsort(array.x)]
-#         >>> sorted.x
-#         <Array [1, 1, 1, 2, 2, 3] type='6 * int64'>
-#         >>> ak.run_lengths(sorted.x)
-#         <Array [3, 2, 1] type='3 * int64'>
-#         >>> ak.unflatten(sorted, ak.run_lengths(sorted.x)).tolist()
-#         [[{'x': 1, 'y': 1.1}, {'x': 1, 'y': 1.1}, {'x': 1, 'y': 1.1}],
-#          [{'x': 2, 'y': 2.2}, {'x': 2, 'y': 2.2}],
-#          [{'x': 3, 'y': 3.3}]]
-
-#     Unlike a database "group by," this operation can be applied in bulk to many sublists
-#     (though the run lengths need to be fully flattened to be used as `counts` for
-#     #ak.unflatten, and you need to specify `axis=-1` as the depth).
-
-#         >>> array = ak.Array([[{"x": 1, "y": 1.1}, {"x": 2, "y": 2.2}, {"x": 1, "y": 1.1}],
-#         ...                   [{"x": 3, "y": 3.3}, {"x": 1, "y": 1.1}, {"x": 2, "y": 2.2}]])
-#         >>> sorted = array[ak.argsort(array.x)]
-#         >>> sorted.x
-#         <Array [[1, 1, 2], [1, 2, 3]] type='2 * var * int64'>
-#         >>> ak.run_lengths(sorted.x)
-#         <Array [[2, 1], [1, 1, 1]] type='2 * var * int64'>
-#         >>> counts = ak.flatten(ak.run_lengths(sorted.x), axis=None)
-#         >>> ak.unflatten(sorted, counts, axis=-1).tolist()
-#         [[[{'x': 1, 'y': 1.1}, {'x': 1, 'y': 1.1}],
-#           [{'x': 2, 'y': 2.2}]],
-#          [[{'x': 1, 'y': 1.1}],
-#           [{'x': 2, 'y': 2.2}],
-#           [{'x': 3, 'y': 3.3}]]]
-
-#     See also #ak.num, #ak.argsort, #ak.unflatten.
-#     """
-#     nplike = ak.nplike.of(array)
-
-#     def lengths_of(data, offsets):
-#         if len(data) == 0:
-#             return nplike.empty(0, np.int64), offsets
-#         else:
-#             diffs = data[1:] != data[:-1]
-#             if isinstance(diffs, ak._v2.highlevel.Array):
-#                 diffs = nplike.asarray(diffs)
-#             if offsets is not None:
-#                 diffs[offsets[1:-1] - 1] = True
-#             positions = nplike.nonzero(diffs)[0]
-#             full_positions = nplike.empty(len(positions) + 2, np.int64)
-#             full_positions[0] = 0
-#             full_positions[-1] = len(data)
-#             full_positions[1:-1] = positions + 1
-#             nextcontent = full_positions[1:] - full_positions[:-1]
-#             if offsets is None:
-#                 nextoffsets = None
-#             else:
-#                 nextoffsets = nplike.searchsorted(full_positions, offsets, side="left")
-#             return nextcontent, nextoffsets
-
-#     def getfunction(layout):
-#         if layout.branch_depth == (False, 1):
-#             if isinstance(layout, ak._v2._util.indexedtypes):
-#                 layout = layout.project()
-
-#             if (
-#                 layout.parameter("__array__") == "string"
-#                 or layout.parameter("__array__") == "bytestring"
-#             ):
-#                 nextcontent, _ = lengths_of(ak._v2.highlevel.Array(layout), None)
-#                 return lambda: ak._v2.contents.NumpyArray(nextcontent)
-
-#             if not isinstance(layout, (ak._v2.contents.NumpyArray, ak._v2.contents.EmptyArray)):
-#                 raise ak._v2._util.error(NotImplementedError(
-#                     "run_lengths on "
-#                     + type(layout).__name__
-#
-#                 ))
-
-#             nextcontent, _ = lengths_of(nplike.asarray(layout), None)
-#             return lambda: ak._v2.contents.NumpyArray(nextcontent)
-
-#         elif layout.branch_depth == (False, 2):
-#             if isinstance(layout, ak._v2._util.indexedtypes):
-#                 layout = layout.project()
-
-#             if not isinstance(layout, ak._v2._util.listtypes):
-#                 raise ak._v2._util.error(NotImplementedError(
-#                     "run_lengths on "
-#                     + type(layout).__name__
-#
-#                 ))
-
-#             if (
-#                 layout.content.parameter("__array__") == "string"
-#                 or layout.content.parameter("__array__") == "bytestring"
-#             ):
-#                 listoffsetarray = layout.toListOffsetArray64(False)
-#                 offsets = nplike.asarray(listoffsetarray.offsets)
-#                 content = listoffsetarray.content[offsets[0] : offsets[-1]]
-
-#                 if isinstance(content, ak._v2._util.indexedtypes):
-#                     content = content.project()
-
-#                 nextcontent, nextoffsets = lengths_of(
-#                     ak._v2.highlevel.Array(content), offsets - offsets[0]
-#                 )
-#                 return lambda: ak._v2.contents.ListOffsetArray64(
-#                     ak._v2.index.Index64(nextoffsets), ak._v2.contents.NumpyArray(nextcontent)
-#                 )
-
-#             listoffsetarray = layout.toListOffsetArray64(False)
-#             offsets = nplike.asarray(listoffsetarray.offsets)
-#             content = listoffsetarray.content[offsets[0] : offsets[-1]]
-
-#             if isinstance(content, ak._v2._util.indexedtypes):
-#                 content = content.project()
-
-#             if not isinstance(content, (ak._v2.contents.NumpyArray, ak._v2.contents.EmptyArray)):
-#                 raise ak._v2._util.error(NotImplementedError(
-#                     "run_lengths on "
-#                     + type(layout).__name__
-#                     + " with content "
-#                     + type(content).__name__
-#
-#                 ))
-
-#             nextcontent, nextoffsets = lengths_of(
-#                 nplike.asarray(content), offsets - offsets[0]
-#             )
-#             return lambda: ak._v2.contents.ListOffsetArray64(
-#                 ak._v2.index.Index64(nextoffsets), ak._v2.contents.NumpyArray(nextcontent)
-#             )
-
-#         else:
-#             return None
-
-#     layout = ak._v2.operations.convert.to_layout(
-#         array, allow_record=False, allow_other=False
-#     )
-
-#     if isinstance(layout, ak.partition.PartitionedArray):   # NO PARTITIONED ARRAY
-#         if len(layout.partitions) != 0 and layout.partitions[0].branch_depth == (
-#             False,
-#             1,
-#         ):
-#             out = ak._v2._util.recursively_apply(
-#                 layout.toContent(),
-#                 getfunction,
-#                 pass_depth=False,
-#                 pass_user=False,
-#             )
-#         else:
-#             outparts = []
-#             for part in layout.partitions:
-#                 outparts.append(
-#                     ak._v2._util.recursively_apply(
-#                         part,
-#                         getfunction,
-#                         pass_depth=False,
-#                         pass_user=False,
-#                     )
-#                 )
-#             out = ak.partition.IrregularlyPartitionedArray(outparts)   # NO PARTITIONED ARRAY
-#     else:
-#         out = ak._v2._util.recursively_apply(
-#             layout,
-#             getfunction,
-#             pass_depth=False,
-#             pass_user=False,
-#         )
-
-#     return ak._v2._util.maybe_wrap_like(out, array, behavior, highlevel)
+
+    """
+    Args:
+        array: Data containing runs of numbers to count.
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.layout.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    Computes the lengths of sequences of identical values at the deepest level
+    of nesting, returning an array with the same structure but with `int64` type.
+
+    For example,
+
+        >>> array = ak.Array([1.1, 1.1, 1.1, 2.2, 3.3, 3.3, 4.4, 4.4, 5.5])
+        >>> ak.run_lengths(array)
+        <Array [3, 1, 2, 2, 1] type='5 * int64'>
+
+    There are 3 instances of 1.1, followed by 1 instance of 2.2, 2 instances of 3.3,
+    2 instances of 4.4, and 1 instance of 5.5.
+
+    The order and uniqueness of the input data doesn't matter,
+
+        >>> array = ak.Array([1.1, 1.1, 1.1, 5.5, 4.4, 4.4, 1.1, 1.1, 5.5])
+        >>> ak.run_lengths(array)
+        <Array [3, 1, 2, 2, 1] type='5 * int64'>
+
+    just the difference between each value and its neighbors.
+
+    The data can be nested, but runs don't cross list boundaries.
+
+        >>> array = ak.Array([[1.1, 1.1, 1.1, 2.2, 3.3], [3.3, 4.4], [4.4, 5.5]])
+        >>> ak.run_lengths(array)
+        <Array [[3, 1, 1], [1, 1], [1, 1]] type='3 * var * int64'>
+
+    This function recognizes strings as distinguishable values.
+
+        >>> array = ak.Array([["one", "one"], ["one", "two", "two"], ["three", "two", "two"]])
+        >>> ak.run_lengths(array)
+        <Array [[2], [1, 2], [1, 2]] type='3 * var * int64'>
+
+    Note that this can be combined with #ak.argsort and #ak.unflatten to compute
+    a "group by" operation:
+
+        >>> array = ak.Array([{"x": 1, "y": 1.1}, {"x": 2, "y": 2.2}, {"x": 1, "y": 1.1},
+        ...                   {"x": 3, "y": 3.3}, {"x": 1, "y": 1.1}, {"x": 2, "y": 2.2}])
+        >>> sorted = array[ak.argsort(array.x)]
+        >>> sorted.x
+        <Array [1, 1, 1, 2, 2, 3] type='6 * int64'>
+        >>> ak.run_lengths(sorted.x)
+        <Array [3, 2, 1] type='3 * int64'>
+        >>> ak.unflatten(sorted, ak.run_lengths(sorted.x)).tolist()
+        [[{'x': 1, 'y': 1.1}, {'x': 1, 'y': 1.1}, {'x': 1, 'y': 1.1}],
+         [{'x': 2, 'y': 2.2}, {'x': 2, 'y': 2.2}],
+         [{'x': 3, 'y': 3.3}]]
+
+    Unlike a database "group by," this operation can be applied in bulk to many sublists
+    (though the run lengths need to be fully flattened to be used as `counts` for
+    #ak.unflatten, and you need to specify `axis=-1` as the depth).
+
+        >>> array = ak.Array([[{"x": 1, "y": 1.1}, {"x": 2, "y": 2.2}, {"x": 1, "y": 1.1}],
+        ...                   [{"x": 3, "y": 3.3}, {"x": 1, "y": 1.1}, {"x": 2, "y": 2.2}]])
+        >>> sorted = array[ak.argsort(array.x)]
+        >>> sorted.x
+        <Array [[1, 1, 2], [1, 2, 3]] type='2 * var * int64'>
+        >>> ak.run_lengths(sorted.x)
+        <Array [[2, 1], [1, 1, 1]] type='2 * var * int64'>
+        >>> counts = ak.flatten(ak.run_lengths(sorted.x), axis=None)
+        >>> ak.unflatten(sorted, counts, axis=-1).tolist()
+        [[[{'x': 1, 'y': 1.1}, {'x': 1, 'y': 1.1}],
+          [{'x': 2, 'y': 2.2}]],
+         [[{'x': 1, 'y': 1.1}],
+          [{'x': 2, 'y': 2.2}],
+          [{'x': 3, 'y': 3.3}]]]
+
+    See also #ak.num, #ak.argsort, #ak.unflatten.
+    """
+    with ak._v2._util.OperationErrorContext(
+        "ak._v2.run_lengths",
+        dict(
+            array=array,
+            highlevel=highlevel,
+            behavior=behavior,
+        ),
+    ):
+        return _impl(array, highlevel, behavior)
+
+
+def _impl(array, highlevel, behavior):
+    nplike = ak.nplike.of(array)
+
+    def lengths_of(data, offsets):
+        if len(data) == 0:
+            return nplike.empty(0, np.int64), offsets
+        else:
+            diffs = data[1:] != data[:-1]
+
+            if isinstance(diffs, ak._v2.highlevel.Array):
+                diffs = nplike.asarray(diffs)
+            if offsets is not None:
+                diffs[offsets[1:-1] - 1] = True
+            positions = nplike.nonzero(diffs)[0]
+            full_positions = nplike.empty(len(positions) + 2, np.int64)
+            full_positions[0] = 0
+            full_positions[-1] = len(data)
+            full_positions[1:-1] = positions + 1
+
+            nextcontent = full_positions[1:] - full_positions[:-1]
+            if offsets is None:
+                nextoffsets = None
+            else:
+                nextoffsets = nplike.searchsorted(full_positions, offsets, side="left")
+            return nextcontent, nextoffsets
+
+    def action(layout, **kwargs):
+        if layout.branch_depth == (False, 1):
+            if layout.is_IndexedType:
+                layout = layout.project()
+
+            if (
+                layout.parameter("__array__") == "string"
+                or layout.parameter("__array__") == "bytestring"
+            ):
+                nextcontent, _ = lengths_of(ak._v2.highlevel.Array(layout), None)
+                return ak._v2.contents.NumpyArray(nextcontent)
+
+            if not isinstance(
+                layout, (ak._v2.contents.NumpyArray, ak._v2.contents.EmptyArray)
+            ):
+                raise ak._v2._util.error(
+                    NotImplementedError("run_lengths on " + type(layout).__name__)
+                )
+
+            nextcontent, _ = lengths_of(nplike.asarray(layout), None)
+            return ak._v2.contents.NumpyArray(nextcontent)
+
+        elif layout.branch_depth == (False, 2):
+            if layout.is_IndexedType:
+                layout = layout.project()
+
+            if not layout.is_ListType:
+                raise ak._v2._util.error(
+                    NotImplementedError("run_lengths on " + type(layout).__name__)
+                )
+
+            if (
+                layout.content.parameter("__array__") == "string"
+                or layout.content.parameter("__array__") == "bytestring"
+            ):
+                listoffsetarray = layout.toListOffsetArray64(False)
+                offsets = nplike.asarray(listoffsetarray.offsets)
+                content = listoffsetarray.content[offsets[0] : offsets[-1]]
+
+                if content.is_IndexedType:
+                    content = content.project()
+
+                nextcontent, nextoffsets = lengths_of(
+                    ak._v2.highlevel.Array(content), offsets - offsets[0]
+                )
+                return ak._v2.contents.ListOffsetArray(
+                    ak._v2.index.Index64(nextoffsets),
+                    ak._v2.contents.NumpyArray(nextcontent),
+                )
+
+            listoffsetarray = layout.toListOffsetArray64(False)
+            offsets = nplike.asarray(listoffsetarray.offsets)
+            content = listoffsetarray.content[offsets[0] : offsets[-1]]
+
+            if content.is_IndexedType:
+                content = content.project()
+
+            if not isinstance(
+                content, (ak._v2.contents.NumpyArray, ak._v2.contents.EmptyArray)
+            ):
+                raise ak._v2._util.error(
+                    NotImplementedError(
+                        "run_lengths on "
+                        + type(layout).__name__
+                        + " with content "
+                        + type(content).__name__
+                    )
+                )
+
+            nextcontent, nextoffsets = lengths_of(
+                nplike.asarray(content), offsets - offsets[0]
+            )
+            return ak._v2.contents.ListOffsetArray(
+                ak._v2.index.Index64(nextoffsets),
+                ak._v2.contents.NumpyArray(nextcontent),
+            )
+        else:
+            return None
+
+    layout = ak._v2.operations.convert.to_layout(
+        array, allow_record=False, allow_other=False
+    )
+
+    out = layout.recursively_apply(action)
+
+    return ak._v2._util.wrap(out, behavior, highlevel)
diff --git a/tests/v2/test_0733-run_lengths.py b/tests/v2/test_0733-run_lengths.py
new file mode 100644
index 0000000000..eb3f25adb9
--- /dev/null
+++ b/tests/v2/test_0733-run_lengths.py
@@ -0,0 +1,63 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+
+import pytest  # noqa: F401
+import numpy as np  # noqa: F401
+import awkward as ak  # noqa: F401
+
+
+def test():
+    array = ak._v2.Array([3, 3, 3, 5, 5, 9, 9, 9, 9, 1, 3, 3])
+    assert ak._v2.operations.structure.run_lengths(array).tolist() == [3, 2, 4, 1, 2]
+
+    array = ak._v2.Array([[3, 3, 3, 5], [5], [], [9, 9], [9, 9], [1, 3, 3]])
+    assert ak._v2.operations.structure.run_lengths(array).tolist() == [[3, 1], [1], [], [2], [2], [1, 2]]
+
+@pytest.mark.skip(reason="ak.unflatten unimplemented")
+def test_groupby():
+    array = ak._v2.Array(
+        [
+            {"x": 1, "y": 1.1},
+            {"x": 2, "y": 2.2},
+            {"x": 1, "y": 1.1},
+            {"x": 3, "y": 3.3},
+            {"x": 1, "y": 1.1},
+            {"x": 2, "y": 2.2},
+        ]
+    )
+    sorted = array[ak._v2.operations.structure.argsort(array.x)]
+    assert sorted.x.tolist() == [1, 1, 1, 2, 2, 3]
+    assert ak._v2.operations.structure.run_lengths(sorted.x).tolist() == [3, 2, 1]
+    assert ak.unflatten(sorted, ak._v2.operations.structure.run_lengths(sorted.x)).tolist() == [
+        [{"x": 1, "y": 1.1}, {"x": 1, "y": 1.1}, {"x": 1, "y": 1.1}],
+        [{"x": 2, "y": 2.2}, {"x": 2, "y": 2.2}],
+        [{"x": 3, "y": 3.3}],
+    ]
+
+    array = ak._v2.Array(
+        [
+            [{"x": 1, "y": 1.1}, {"x": 2, "y": 2.2}, {"x": 1, "y": 1.1}],
+            [{"x": 3, "y": 3.3}, {"x": 1, "y": 1.1}, {"x": 2, "y": 2.2}],
+        ]
+    )
+    sorted = array[ak._v2.operations.structure.argsort(array.x)]
+    assert sorted.x.tolist() == [[1, 1, 2], [1, 2, 3]]
+    assert ak._v2.operations.structure.run_lengths(sorted.x).tolist() == [[2, 1], [1, 1, 1]]
+    counts = ak._v2.operations.structure.flatten(ak._v2.operations.structure.run_lengths(sorted.x), axis=None)
+    assert ak.unflatten(sorted, counts, axis=-1).tolist() == [
+        [[{"x": 1, "y": 1.1}, {"x": 1, "y": 1.1}], [{"x": 2, "y": 2.2}]],
+        [[{"x": 1, "y": 1.1}], [{"x": 2, "y": 2.2}], [{"x": 3, "y": 3.3}]],
+    ]
+
+
+def test_onstrings1():
+    data = ak.Array(["one", "one", "one", "two", "two", "three", "two", "two"])
+    assert ak.run_lengths(data).tolist() == [3, 2, 1, 2]
+
+    data = ak._v2.Array(["one", "one", "one", "two", "two", "three", "two", "two"])
+    assert ak._v2.operations.structure.run_lengths(data).tolist() == [3, 2, 1, 2]
+
+
+def test_onstrings2():
+    data = ak._v2.Array([["one", "one"], ["one", "two", "two"], ["three", "two", "two"]])
+    assert ak._v2.operations.structure.run_lengths(data).tolist() == [[2], [1, 2], [1, 2]]

From 23a3ef4f9978a66f3d0dd7e8c94e7dafd462f5c3 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 8 Mar 2022 09:29:05 +0000
Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 tests/v2/test_0733-run_lengths.py | 33 +++++++++++++++++++++++++------
 1 file changed, 27 insertions(+), 6 deletions(-)

diff --git a/tests/v2/test_0733-run_lengths.py b/tests/v2/test_0733-run_lengths.py
index eb3f25adb9..2c5883a3e4 100644
--- a/tests/v2/test_0733-run_lengths.py
+++ b/tests/v2/test_0733-run_lengths.py
@@ -11,7 +11,15 @@ def test():
     assert ak._v2.operations.structure.run_lengths(array).tolist() == [3, 2, 4, 1, 2]
 
     array = ak._v2.Array([[3, 3, 3, 5], [5], [], [9, 9], [9, 9], [1, 3, 3]])
-    assert ak._v2.operations.structure.run_lengths(array).tolist() == [[3, 1], [1], [], [2], [2], [1, 2]]
+    assert ak._v2.operations.structure.run_lengths(array).tolist() == [
+        [3, 1],
+        [1],
+        [],
+        [2],
+        [2],
+        [1, 2],
+    ]
+
 
 @pytest.mark.skip(reason="ak.unflatten unimplemented")
 def test_groupby():
@@ -28,7 +36,9 @@ def test_groupby():
     sorted = array[ak._v2.operations.structure.argsort(array.x)]
     assert sorted.x.tolist() == [1, 1, 1, 2, 2, 3]
     assert ak._v2.operations.structure.run_lengths(sorted.x).tolist() == [3, 2, 1]
-    assert ak.unflatten(sorted, ak._v2.operations.structure.run_lengths(sorted.x)).tolist() == [
+    assert ak.unflatten(
+        sorted, ak._v2.operations.structure.run_lengths(sorted.x)
+    ).tolist() == [
         [{"x": 1, "y": 1.1}, {"x": 1, "y": 1.1}, {"x": 1, "y": 1.1}],
         [{"x": 2, "y": 2.2}, {"x": 2, "y": 2.2}],
         [{"x": 3, "y": 3.3}],
@@ -42,8 +52,13 @@ def test_groupby():
     )
     sorted = array[ak._v2.operations.structure.argsort(array.x)]
     assert sorted.x.tolist() == [[1, 1, 2], [1, 2, 3]]
-    assert ak._v2.operations.structure.run_lengths(sorted.x).tolist() == [[2, 1], [1, 1, 1]]
-    counts = ak._v2.operations.structure.flatten(ak._v2.operations.structure.run_lengths(sorted.x), axis=None)
+    assert ak._v2.operations.structure.run_lengths(sorted.x).tolist() == [
+        [2, 1],
+        [1, 1, 1],
+    ]
+    counts = ak._v2.operations.structure.flatten(
+        ak._v2.operations.structure.run_lengths(sorted.x), axis=None
+    )
     assert ak.unflatten(sorted, counts, axis=-1).tolist() == [
         [[{"x": 1, "y": 1.1}, {"x": 1, "y": 1.1}], [{"x": 2, "y": 2.2}]],
         [[{"x": 1, "y": 1.1}], [{"x": 2, "y": 2.2}], [{"x": 3, "y": 3.3}]],
@@ -59,5 +74,11 @@ def test_onstrings1():
 
 
 def test_onstrings2():
-    data = ak._v2.Array([["one", "one"], ["one", "two", "two"], ["three", "two", "two"]])
-    assert ak._v2.operations.structure.run_lengths(data).tolist() == [[2], [1, 2], [1, 2]]
+    data = ak._v2.Array(
+        [["one", "one"], ["one", "two", "two"], ["three", "two", "two"]]
+    )
+    assert ak._v2.operations.structure.run_lengths(data).tolist() == [
+        [2],
+        [1, 2],
+        [1, 2],
+    ]