From 4366126a3b5c5b7524b6f35351eff730c2a49c04 Mon Sep 17 00:00:00 2001 From: tp Date: Wed, 22 Nov 2017 22:33:19 +0000 Subject: [PATCH] BUG: Copy categorical codes if empty (fixes #18051) --- doc/source/whatsnew/v0.21.1.txt | 1 + pandas/core/categorical.py | 2 +- pandas/tests/series/test_analytics.py | 14 ++++++++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.21.1.txt b/doc/source/whatsnew/v0.21.1.txt index 5829481cdb731..682776de2e62c 100644 --- a/doc/source/whatsnew/v0.21.1.txt +++ b/doc/source/whatsnew/v0.21.1.txt @@ -135,6 +135,7 @@ Categorical - Bug in :meth:`DataFrame.astype` where casting to 'category' on an empty ``DataFrame`` causes a segmentation fault (:issue:`18004`) - Error messages in the testing module have been improved when items have different ``CategoricalDtype`` (:issue:`18069`) - ``CategoricalIndex`` can now correctly take a ``pd.api.types.CategoricalDtype`` as its dtype (:issue:`18116`) +- Bug in ``Categorical.unique()`` returning read-only ``codes`` array when all categories were ``NaN`` (:issue:`18051`) Other ^^^^^ diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 03bf09352862b..deaec20586005 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -2276,7 +2276,7 @@ def _recode_for_categories(codes, old_categories, new_categories): if len(old_categories) == 0: # All null anyway, so just retain the nulls - return codes + return codes.copy() indexer = coerce_indexer_dtype(new_categories.get_indexer(old_categories), new_categories) new_codes = take_1d(indexer, codes.copy(), fill_value=-1) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index ff788fb2347b8..cfc319da1598d 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -848,6 +848,12 @@ def test_value_counts_nunique(self): result = series.nunique() assert result == 11 + # GH 18051 + s = pd.Series(pd.Categorical([])) + assert s.nunique() == 0 + s = pd.Series(pd.Categorical([np.nan])) + assert s.nunique() == 0 + def test_unique(self): # 714 also, dtype=float @@ -873,6 +879,14 @@ def test_unique(self): expected = np.array([1, 2, 3, None], dtype=object) tm.assert_numpy_array_equal(result, expected) + # GH 18051 + s = pd.Series(pd.Categorical([])) + tm.assert_categorical_equal(s.unique(), pd.Categorical([]), + check_dtype=False) + s = pd.Series(pd.Categorical([np.nan])) + tm.assert_categorical_equal(s.unique(), pd.Categorical([np.nan]), + check_dtype=False) + @pytest.mark.parametrize( "tc1, tc2", [