From 579b8268474c5c89133ae7106a323dd94e1e33fa Mon Sep 17 00:00:00 2001 From: Paras Gupta Date: Fri, 13 Oct 2023 21:57:36 +0530 Subject: [PATCH] BUG: categorical dtype equality for level in different type (#55486) * BUG: categorical dtype equality for level in different type * BUG: categorical dtype equality for level in different type - Comments#1 --- doc/source/whatsnew/v2.2.0.rst | 1 + pandas/core/dtypes/dtypes.py | 2 +- pandas/tests/dtypes/test_dtypes.py | 18 ++++++++++++++++++ 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index eec82ae26afcc..ef1a34c4c27c6 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -285,6 +285,7 @@ Bug fixes Categorical ^^^^^^^^^^^ - :meth:`Categorical.isin` raising ``InvalidIndexError`` for categorical containing overlapping :class:`Interval` values (:issue:`34974`) +- Bug in :meth:`CategoricalDtype.__eq__` returning false for unordered categorical data with mixed types (:issue:`55468`) - Datetimelike diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 196c95c3673e9..9f85f82df666f 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -456,7 +456,7 @@ def __eq__(self, other: object) -> bool: # With object-dtype we need a comparison that identifies # e.g. int(2) as distinct from float(2) - return hash(self) == hash(other) + return set(left) == set(right) def __repr__(self) -> str_type: if self.categories is None: diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 1f9c371c50ad4..27994708d2bdb 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -917,6 +917,24 @@ def test_equal_but_different(self): assert c1 is not c2 assert c1 != c2 + def test_equal_but_different_mixed_dtypes(self): + c1 = CategoricalDtype([1, 2, "3"]) + c2 = CategoricalDtype(["3", 1, 2]) + assert c1 is not c2 + assert c1 == c2 + + def test_equal_empty_ordered(self): + c1 = CategoricalDtype([], ordered=True) + c2 = CategoricalDtype([], ordered=True) + assert c1 is not c2 + assert c1 == c2 + + def test_equal_empty_unordered(self): + c1 = CategoricalDtype([]) + c2 = CategoricalDtype([]) + assert c1 is not c2 + assert c1 == c2 + @pytest.mark.parametrize("v1, v2", [([1, 2, 3], [1, 2, 3]), ([1, 2, 3], [3, 2, 1])]) def test_order_hashes_different(self, v1, v2): c1 = CategoricalDtype(v1, ordered=False)