From 98e1d2f8cec1593bb8bc6a8dbdd164372286ea09 Mon Sep 17 00:00:00 2001 From: rohanjain101 <38412262+rohanjain101@users.noreply.github.com> Date: Tue, 19 Dec 2023 18:40:45 -0500 Subject: [PATCH] Support multiplication of pd.ArrowDtype(pa.string()) and integral value where integral value is a series (#56538) * allow repeat count to be a series * fix validation * gh reference * fix conditional logic * Revert "fix conditional logic" This reverts commit 15f19901744ddcc72562efa8da6508d81abbf2f5. * remove condition * inline --------- Co-authored-by: Rohan Jain --- pandas/core/arrays/arrow/array.py | 29 ++++++++++++++++++---------- pandas/tests/extension/test_arrow.py | 20 +++++++++++++++++++ 2 files changed, 39 insertions(+), 10 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 84d6e2fb7ca53..633efe43fce1a 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -693,22 +693,31 @@ def _evaluate_op_method(self, other, op, arrow_funcs): other = self._box_pa(other) if pa.types.is_string(pa_type) or pa.types.is_binary(pa_type): - if op in [operator.add, roperator.radd, operator.mul, roperator.rmul]: + if op in [operator.add, roperator.radd]: sep = pa.scalar("", type=pa_type) if op is operator.add: result = pc.binary_join_element_wise(self._pa_array, other, sep) elif op is roperator.radd: result = pc.binary_join_element_wise(other, self._pa_array, sep) - else: - if not ( - isinstance(other, pa.Scalar) and pa.types.is_integer(other.type) - ): - raise TypeError("Can only string multiply by an integer.") - result = pc.binary_join_element_wise( - *([self._pa_array] * other.as_py()), sep - ) return type(self)(result) - + elif op in [operator.mul, roperator.rmul]: + binary = self._pa_array + integral = other + if not pa.types.is_integer(integral.type): + raise TypeError("Can only string multiply by an integer.") + pa_integral = pc.if_else(pc.less(integral, 0), 0, integral) + result = pc.binary_repeat(binary, pa_integral) + return type(self)(result) + elif ( + pa.types.is_string(other.type) or pa.types.is_binary(other.type) + ) and op in [operator.mul, roperator.rmul]: + binary = other + integral = self._pa_array + if not pa.types.is_integer(integral.type): + raise TypeError("Can only string multiply by an integer.") + pa_integral = pc.if_else(pc.less(integral, 0), 0, integral) + result = pc.binary_repeat(binary, pa_integral) + return type(self)(result) if ( isinstance(other, pa.Scalar) and pc.is_null(other).as_py() diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 4835cb11db042..674a5da216011 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -1334,6 +1334,26 @@ def test_arrowdtype_construct_from_string_type_only_one_pyarrow(): pd.Series(range(3), dtype=invalid) +def test_arrow_string_multiplication(): + # GH 56537 + binary = pd.Series(["abc", "defg"], dtype=ArrowDtype(pa.string())) + repeat = pd.Series([2, -2], dtype="int64[pyarrow]") + result = binary * repeat + expected = pd.Series(["abcabc", ""], dtype=ArrowDtype(pa.string())) + tm.assert_series_equal(result, expected) + reflected_result = repeat * binary + tm.assert_series_equal(result, reflected_result) + + +def test_arrow_string_multiplication_scalar_repeat(): + binary = pd.Series(["abc", "defg"], dtype=ArrowDtype(pa.string())) + result = binary * 2 + expected = pd.Series(["abcabc", "defgdefg"], dtype=ArrowDtype(pa.string())) + tm.assert_series_equal(result, expected) + reflected_result = 2 * binary + tm.assert_series_equal(reflected_result, expected) + + @pytest.mark.parametrize( "interpolation", ["linear", "lower", "higher", "nearest", "midpoint"] )