From d7c22601e7046bdcdc3b59eeb82be6ead2c96460 Mon Sep 17 00:00:00 2001 From: a-reich <73507369+a-reich@users.noreply.github.com> Date: Tue, 14 May 2024 07:47:55 -0400 Subject: [PATCH] GH-41464: [Python] Fix StructArray.sort() for by=None (#41495) ### Rationale for this change Closes issue https://github.com/apache/arrow/issues/41464. Fix `StructArray.sort` method's `by` param to work in the case of `by=None` which was documented to mean sort by all fields (the default), but would raise an exception. ### What changes are included in this PR? * Add a unit test with by=None in `test_struct_array_sort` that fails on main * Fix the sort method ### Are these changes tested? yes ### Are there any user-facing changes? yes * GitHub Issue: #41464 Authored-by: a-reich Signed-off-by: Joris Van den Bossche --- python/pyarrow/array.pxi | 7 +++---- python/pyarrow/tests/test_array.py | 8 ++++++++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index 946c82b258241..406830ad4dd69 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -3920,12 +3920,11 @@ cdef class StructArray(Array): result : StructArray """ if by is not None: - tosort = self._flattened_field(by) + tosort, sort_keys = self._flattened_field(by), [("", order)] else: - tosort = self + tosort, sort_keys = self, [(field.name, order) for field in self.type] indices = _pc().sort_indices( - tosort, - options=_pc().SortOptions(sort_keys=[("", order)], **kwargs) + tosort, options=_pc().SortOptions(sort_keys=sort_keys, **kwargs) ) return self.take(indices) diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index f1f946ecc7dfb..b89e0ace157af 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -3536,6 +3536,14 @@ def test_struct_array_sort(): {"a": 5, "b": "foo"}, ] + sorted_arr = arr.sort() + assert sorted_arr.to_pylist() == [ + {"a": 5, "b": "foo"}, + {"a": 7, "b": "bar"}, + {"a": 7, "b": "car"}, + {"a": 35, "b": "foobar"}, + ] + arr_with_nulls = pa.StructArray.from_arrays([ pa.array([5, 7, 7, 35], type=pa.int64()), pa.array(["foo", "car", "bar", "foobar"])