Skip to content

Commit

Permalink
Support multiplication of pd.ArrowDtype(pa.string()) and integral val…
Browse files Browse the repository at this point in the history
…ue where integral value is a series (pandas-dev#56538)

* allow repeat count to be a series

* fix validation

* gh reference

* fix conditional logic

* Revert "fix conditional logic"

This reverts commit 15f1990.

* remove condition

* inline

---------

Co-authored-by: Rohan Jain <[email protected]>
  • Loading branch information
rohanjain101 and Rohan Jain authored Dec 19, 2023
1 parent 38c2877 commit 98e1d2f
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 10 deletions.
29 changes: 19 additions & 10 deletions pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -693,22 +693,31 @@ def _evaluate_op_method(self, other, op, arrow_funcs):
other = self._box_pa(other)

if pa.types.is_string(pa_type) or pa.types.is_binary(pa_type):
if op in [operator.add, roperator.radd, operator.mul, roperator.rmul]:
if op in [operator.add, roperator.radd]:
sep = pa.scalar("", type=pa_type)
if op is operator.add:
result = pc.binary_join_element_wise(self._pa_array, other, sep)
elif op is roperator.radd:
result = pc.binary_join_element_wise(other, self._pa_array, sep)
else:
if not (
isinstance(other, pa.Scalar) and pa.types.is_integer(other.type)
):
raise TypeError("Can only string multiply by an integer.")
result = pc.binary_join_element_wise(
*([self._pa_array] * other.as_py()), sep
)
return type(self)(result)

elif op in [operator.mul, roperator.rmul]:
binary = self._pa_array
integral = other
if not pa.types.is_integer(integral.type):
raise TypeError("Can only string multiply by an integer.")
pa_integral = pc.if_else(pc.less(integral, 0), 0, integral)
result = pc.binary_repeat(binary, pa_integral)
return type(self)(result)
elif (
pa.types.is_string(other.type) or pa.types.is_binary(other.type)
) and op in [operator.mul, roperator.rmul]:
binary = other
integral = self._pa_array
if not pa.types.is_integer(integral.type):
raise TypeError("Can only string multiply by an integer.")
pa_integral = pc.if_else(pc.less(integral, 0), 0, integral)
result = pc.binary_repeat(binary, pa_integral)
return type(self)(result)
if (
isinstance(other, pa.Scalar)
and pc.is_null(other).as_py()
Expand Down
20 changes: 20 additions & 0 deletions pandas/tests/extension/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -1334,6 +1334,26 @@ def test_arrowdtype_construct_from_string_type_only_one_pyarrow():
pd.Series(range(3), dtype=invalid)


def test_arrow_string_multiplication():
# GH 56537
binary = pd.Series(["abc", "defg"], dtype=ArrowDtype(pa.string()))
repeat = pd.Series([2, -2], dtype="int64[pyarrow]")
result = binary * repeat
expected = pd.Series(["abcabc", ""], dtype=ArrowDtype(pa.string()))
tm.assert_series_equal(result, expected)
reflected_result = repeat * binary
tm.assert_series_equal(result, reflected_result)


def test_arrow_string_multiplication_scalar_repeat():
binary = pd.Series(["abc", "defg"], dtype=ArrowDtype(pa.string()))
result = binary * 2
expected = pd.Series(["abcabc", "defgdefg"], dtype=ArrowDtype(pa.string()))
tm.assert_series_equal(result, expected)
reflected_result = 2 * binary
tm.assert_series_equal(reflected_result, expected)


@pytest.mark.parametrize(
"interpolation", ["linear", "lower", "higher", "nearest", "midpoint"]
)
Expand Down

0 comments on commit 98e1d2f

Please sign in to comment.