From 62422e1cd9f32e296ae9bcbf712acddbb96b77dd Mon Sep 17 00:00:00 2001 From: tp Date: Wed, 22 Nov 2017 22:33:19 +0000 Subject: [PATCH] BUG: Copy categorical codes if empty (fixes #18051) --- doc/source/whatsnew/v0.21.1.txt | 1 + pandas/core/categorical.py | 2 +- pandas/tests/series/test_analytics.py | 14 ++++++++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.21.1.txt b/doc/source/whatsnew/v0.21.1.txt index 0ab536f2898c75..7a5ccc6fa78532 100644 --- a/doc/source/whatsnew/v0.21.1.txt +++ b/doc/source/whatsnew/v0.21.1.txt @@ -130,6 +130,7 @@ Categorical - Error messages in the testing module have been improved when items have different ``CategoricalDtype`` (:issue:`18069`) - ``CategoricalIndex`` can now correctly take a ``pd.api.types.CategoricalDtype`` as its dtype (:issue:`18116`) +- Bug in ``Categorical.unique()`` returning read-only ``codes`` array when all categories were ``NaN`` (:issue:`18051`) Other ^^^^^ diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index d0851e3ab4f96c..3e3ff3493a10bc 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -2255,7 +2255,7 @@ def _recode_for_categories(codes, old_categories, new_categories): if len(old_categories) == 0: # All null anyway, so just retain the nulls - return codes + return codes.copy() indexer = coerce_indexer_dtype(new_categories.get_indexer(old_categories), new_categories) new_codes = take_1d(indexer, codes.copy(), fill_value=-1) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index ff788fb2347b8a..cfc319da1598d0 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -848,6 +848,12 @@ def test_value_counts_nunique(self): result = series.nunique() assert result == 11 + # GH 18051 + s = pd.Series(pd.Categorical([])) + assert s.nunique() == 0 + s = pd.Series(pd.Categorical([np.nan])) + assert s.nunique() == 0 + def test_unique(self): # 714 also, dtype=float @@ -873,6 +879,14 @@ def test_unique(self): expected = np.array([1, 2, 3, None], dtype=object) tm.assert_numpy_array_equal(result, expected) + # GH 18051 + s = pd.Series(pd.Categorical([])) + tm.assert_categorical_equal(s.unique(), pd.Categorical([]), + check_dtype=False) + s = pd.Series(pd.Categorical([np.nan])) + tm.assert_categorical_equal(s.unique(), pd.Categorical([np.nan]), + check_dtype=False) + @pytest.mark.parametrize( "tc1, tc2", [