From e07f63930e455e1698e41413fc309587b7d3074a Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Mon, 2 Oct 2023 15:31:26 +0100 Subject: [PATCH 1/3] BUG: eq not implemented for categorical and arrow backed strings --- doc/source/whatsnew/v2.1.2.rst | 2 +- pandas/core/arrays/arrow/array.py | 5 ++++- pandas/tests/indexes/categorical/test_equals.py | 6 ++++++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.1.2.rst b/doc/source/whatsnew/v2.1.2.rst index 1a25b848e0f84..7bee10d687def 100644 --- a/doc/source/whatsnew/v2.1.2.rst +++ b/doc/source/whatsnew/v2.1.2.rst @@ -24,7 +24,7 @@ Bug fixes ~~~~~~~~~ - Fixed bug in :meth:`DataFrame.resample` not respecting ``closed`` and ``label`` arguments for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55282`) - Fixed bug in :meth:`DataFrame.resample` where bin edges were not correct for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55281`) -- +- Fixed bug in :meth:`Categorical.equals` if other has arrow backed string dtype (:issue:`TODO`) .. --------------------------------------------------------------------------- .. _whatsnew_212.other: diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 4b79d0dbb683e..4937581b0abdd 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -32,6 +32,7 @@ from pandas.core.dtypes.cast import can_hold_element from pandas.core.dtypes.common import ( + CategoricalDtype, is_array_like, is_bool_dtype, is_integer, @@ -627,7 +628,9 @@ def __setstate__(self, state) -> None: def _cmp_method(self, other, op): pc_func = ARROW_CMP_FUNCS[op.__name__] - if isinstance(other, (ArrowExtensionArray, np.ndarray, list, BaseMaskedArray)): + if isinstance( + other, (ArrowExtensionArray, np.ndarray, list, BaseMaskedArray) + ) or isinstance(getattr(other, "dtype", None), CategoricalDtype): result = pc_func(self._pa_array, self._box_pa(other)) elif is_scalar(other): try: diff --git a/pandas/tests/indexes/categorical/test_equals.py b/pandas/tests/indexes/categorical/test_equals.py index 1ed8f3a903439..c693e1181b9d0 100644 --- a/pandas/tests/indexes/categorical/test_equals.py +++ b/pandas/tests/indexes/categorical/test_equals.py @@ -88,3 +88,9 @@ def test_equals_multiindex(self): ci = mi.to_flat_index().astype("category") assert not ci.equals(mi) + + def test_equals_string_dtype(self, any_string_dtype): + # GH# + idx = CategoricalIndex(list("abc"), name="B") + other = Index(["a", "b", "c"], name="B", dtype=any_string_dtype) + assert idx.equals(other) From 0ff376bf8c7871c13c2cfcc3d605bd2bfda92594 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Tue, 3 Oct 2023 11:42:20 +0200 Subject: [PATCH 2/3] Update test_equals.py --- pandas/tests/indexes/categorical/test_equals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/categorical/test_equals.py b/pandas/tests/indexes/categorical/test_equals.py index c693e1181b9d0..a8353f301a3c3 100644 --- a/pandas/tests/indexes/categorical/test_equals.py +++ b/pandas/tests/indexes/categorical/test_equals.py @@ -90,7 +90,7 @@ def test_equals_multiindex(self): assert not ci.equals(mi) def test_equals_string_dtype(self, any_string_dtype): - # GH# + # GH#55364 idx = CategoricalIndex(list("abc"), name="B") other = Index(["a", "b", "c"], name="B", dtype=any_string_dtype) assert idx.equals(other) From 9bdb9b9d7b48e377c08228fefedae2cac078fa0b Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Tue, 3 Oct 2023 11:43:36 +0200 Subject: [PATCH 3/3] Update v2.1.2.rst --- doc/source/whatsnew/v2.1.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.2.rst b/doc/source/whatsnew/v2.1.2.rst index 7bee10d687def..1fb132bafe39f 100644 --- a/doc/source/whatsnew/v2.1.2.rst +++ b/doc/source/whatsnew/v2.1.2.rst @@ -24,7 +24,7 @@ Bug fixes ~~~~~~~~~ - Fixed bug in :meth:`DataFrame.resample` not respecting ``closed`` and ``label`` arguments for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55282`) - Fixed bug in :meth:`DataFrame.resample` where bin edges were not correct for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55281`) -- Fixed bug in :meth:`Categorical.equals` if other has arrow backed string dtype (:issue:`TODO`) +- Fixed bug in :meth:`Categorical.equals` if other has arrow backed string dtype (:issue:`55364`) .. --------------------------------------------------------------------------- .. _whatsnew_212.other: