From 6997fd8d00992fa47ce0a660c4df6e16cd2aa989 Mon Sep 17 00:00:00 2001 From: tp Date: Sun, 23 Dec 2018 16:24:11 +0000 Subject: [PATCH] deprecate categories and ordered parameters --- doc/source/whatsnew/v0.24.0.rst | 1 + pandas/core/arrays/categorical.py | 24 +++--- .../arrays/categorical/test_constructors.py | 81 ++++++------------- pandas/tests/indexes/test_category.py | 3 +- 4 files changed, 40 insertions(+), 69 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 4c0dbce9122e10..c742b4352beae9 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1123,6 +1123,7 @@ Deprecations - :meth:`Series.compress` is deprecated. Use ``Series[condition]`` instead (:issue:`18262`) - The signature of :meth:`Series.to_csv` has been uniformed to that of :meth:`DataFrame.to_csv`: the name of the first argument is now ``path_or_buf``, the order of subsequent arguments has changed, the ``header`` argument now defaults to ``True``. (:issue:`19715`) - :meth:`Categorical.from_codes` has deprecated providing float values for the ``codes`` argument. (:issue:`21767`) +- :meth:`Categorical.from_codes` has deprecated parameters ``categories`` and ``ordered``. Supply a :class:`~pandas.api.types.CategoricalDtype` to new parameter ``dtype`` instead. (:issue:`24398`) - :func:`pandas.read_table` is deprecated. Instead, use :func:`read_csv` passing ``sep='\t'`` if necessary (:issue:`21948`) - :meth:`Series.str.cat` has deprecated using arbitrary list-likes *within* list-likes. A list-like container may still contain many ``Series``, ``Index`` or 1-dimensional ``np.ndarray``, or alternatively, only scalar values. (:issue:`21950`) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index aad97dbd49b678..4ae138ad364255 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -639,11 +639,13 @@ def _from_inferred_categories(cls, inferred_categories, inferred_codes, return cls(codes, dtype=dtype, fastpath=True) @classmethod + @deprecate_kwarg(old_arg_name='categories', new_arg_name=None) + @deprecate_kwarg(old_arg_name='ordered', new_arg_name=None) def from_codes(cls, codes, categories=None, ordered=None, dtype=None): """ - Make a Categorical type from codes and categories arrays. + Make a Categorical type from codes and CategoricalDtype. - This constructor is useful if you already have codes and categories and + This constructor is useful if you already have codes and the dtype and so do not need the (computation intensive) factorization step, which is usually done on the constructor. @@ -657,16 +659,17 @@ def from_codes(cls, codes, categories=None, ordered=None, dtype=None): categories or -1 for NaN categories : index-like, optional The categories for the categorical. Items need to be unique. + + .. deprecated:: 0.24.0 + Use ``dtype`` instead. ordered : bool, optional Whether or not this categorical is treated as an ordered categorical. If not given, the resulting categorical will be unordered. - .. versionchanged:: 0.24.0 - - The default value has been changed to ``None``. Previously - the default value was ``False``. - dtype : CategoricalDtype, optional + .. deprecated:: 0.24.0 + Use ``dtype`` instead. + dtype : CategoricalDtype An instance of ``CategoricalDtype`` to use for this categorical. .. versionadded:: 0.24.0 @@ -682,6 +685,8 @@ def from_codes(cls, codes, categories=None, ordered=None, dtype=None): if categories is not None or ordered is not None: raise ValueError("Cannot specify `categories` or `ordered` " "together with `dtype`.") + elif categories is None and dtype is None: + raise ValueError("Must specify `categories` or `dtype`.") else: dtype = CategoricalDtype(categories, ordered) @@ -1245,9 +1250,8 @@ def map(self, mapper): """ new_categories = self.categories.map(mapper) try: - return self.from_codes(self._codes.copy(), - categories=new_categories, - ordered=self.ordered) + new_dtype = CategoricalDtype(new_categories, ordered=self.ordered) + return self.from_codes(self._codes.copy(), dtype=new_dtype) except ValueError: return np.take(new_categories, self._codes) diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py index 1806f7d7f4da7e..09bc3a8b7143ed 100644 --- a/pandas/tests/arrays/categorical/test_constructors.py +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -21,18 +21,13 @@ class TestCategoricalConstructors(object): def test_validate_ordered(self): # see gh-14058 exp_msg = "'ordered' must either be 'True' or 'False'" - exp_err = TypeError - # This should be a boolean. + # This should be a boolean or None. ordered = np.array([0, 1, 2]) - with pytest.raises(exp_err, match=exp_msg): + with pytest.raises(TypeError, match=exp_msg): Categorical([1, 2, 3], ordered=ordered) - with pytest.raises(exp_err, match=exp_msg): - Categorical.from_codes([0, 0, 1], categories=['a', 'b', 'c'], - ordered=ordered) - def test_constructor_empty(self): # GH 17248 c = Categorical([]) @@ -421,76 +416,41 @@ def test_constructor_with_categorical_categories(self): tm.assert_categorical_equal(result, expected) def test_from_codes(self): + dtype = CategoricalDtype(categories=[1, 2]) + + # no dtype or categories + msg = "Must specify `categories` or `dtype`." + with pytest.raises(ValueError, match=msg): + Categorical.from_codes([1, 2]) # too few categories - dtype = CategoricalDtype(categories=[1, 2]) msg = "codes need to be between " - with pytest.raises(ValueError, match=msg): - Categorical.from_codes([1, 2], categories=dtype.categories) with pytest.raises(ValueError, match=msg): Categorical.from_codes([1, 2], dtype=dtype) # no int codes msg = "codes need to be array-like integers" - with pytest.raises(ValueError, match=msg): - Categorical.from_codes(["a"], categories=dtype.categories) with pytest.raises(ValueError, match=msg): Categorical.from_codes(["a"], dtype=dtype) - # no unique categories - with pytest.raises(ValueError, - match="Categorical categories must be unique"): - Categorical.from_codes([0, 1, 2], categories=["a", "a", "b"]) - - # NaN categories included - with pytest.raises(ValueError, - match="Categorial categories cannot be null"): - Categorical.from_codes([0, 1, 2], categories=["a", "b", np.nan]) - # too negative dtype = CategoricalDtype(categories=["a", "b", "c"]) msg = r"codes need to be between -1 and len\(categories\)-1" - with pytest.raises(ValueError, match=msg): - Categorical.from_codes([-2, 1, 2], categories=dtype.categories) with pytest.raises(ValueError, match=msg): Categorical.from_codes([-2, 1, 2], dtype=dtype) exp = Categorical(["a", "b", "c"], ordered=False) - res = Categorical.from_codes([0, 1, 2], categories=dtype.categories) - tm.assert_categorical_equal(exp, res) - res = Categorical.from_codes([0, 1, 2], dtype=dtype) tm.assert_categorical_equal(exp, res) codes = np.random.choice([0, 1], 5, p=[0.9, 0.1]) dtype = CategoricalDtype(categories=["train", "test"]) - Categorical.from_codes(codes, categories=dtype.categories) Categorical.from_codes(codes, dtype=dtype) - def test_from_codes_with_categorical_categories(self): - # GH17884 - expected = Categorical(['a', 'b'], categories=['a', 'b', 'c']) - - result = Categorical.from_codes( - [0, 1], categories=Categorical(['a', 'b', 'c'])) - tm.assert_categorical_equal(result, expected) - - result = Categorical.from_codes( - [0, 1], categories=CategoricalIndex(['a', 'b', 'c'])) - tm.assert_categorical_equal(result, expected) - - # non-unique Categorical still raises - with pytest.raises(ValueError, - match="Categorical categories must be unique"): - Categorical.from_codes([0, 1], Categorical(['a', 'b', 'a'])) - def test_from_codes_with_nan_code(self): # GH21767 codes = [1, 2, np.nan] dtype = CategoricalDtype(categories=['a', 'b', 'c']) - with pytest.raises(ValueError, - match="codes need to be array-like integers"): - Categorical.from_codes(codes, categories=dtype.categories) with pytest.raises(ValueError, match="codes need to be array-like integers"): Categorical.from_codes(codes, dtype=dtype) @@ -500,28 +460,32 @@ def test_from_codes_with_float(self): codes = [1.0, 2.0, 0] # integer, but in float dtype dtype = CategoricalDtype(categories=['a', 'b', 'c']) - with tm.assert_produces_warning(FutureWarning): - cat = Categorical.from_codes(codes, dtype.categories) - tm.assert_numpy_array_equal(cat.codes, np.array([1, 2, 0], dtype='i1')) - - with tm.assert_produces_warning(FutureWarning): + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): cat = Categorical.from_codes(codes, dtype=dtype) tm.assert_numpy_array_equal(cat.codes, np.array([1, 2, 0], dtype='i1')) codes = [1.1, 2.0, 0] # non-integer - with pytest.raises(ValueError, - match="codes need to be array-like integers"): - Categorical.from_codes(codes, dtype.categories) with pytest.raises(ValueError, match="codes need to be array-like integers"): Categorical.from_codes(codes, dtype=dtype) + def test_from_codes_deprecated(self): + with tm.assert_produces_warning(FutureWarning): + Categorical.from_codes([0, 1], categories=['a', 'b']) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + Categorical.from_codes([0, 1], categories=['a', 'b'], ordered=True) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + Categorical.from_codes([0, 1], categories=['a', 'b'], ordered=False) + @pytest.mark.parametrize('dtype', [None, 'category']) def test_from_inferred_categories(self, dtype): cats = ['a', 'b'] codes = np.array([0, 0, 1, 1], dtype='i8') result = Categorical._from_inferred_categories(cats, codes, dtype) - expected = Categorical.from_codes(codes, cats) + expected = Categorical.from_codes(codes, + dtype=CategoricalDtype(cats)) tm.assert_categorical_equal(result, expected) @pytest.mark.parametrize('dtype', [None, 'category']) @@ -529,7 +493,8 @@ def test_from_inferred_categories_sorts(self, dtype): cats = ['b', 'a'] codes = np.array([0, 1, 1, 1], dtype='i8') result = Categorical._from_inferred_categories(cats, codes, dtype) - expected = Categorical.from_codes([1, 0, 0, 0], ['a', 'b']) + expected = Categorical.from_codes([1, 0, 0, 0], + dtype=CategoricalDtype(['a', 'b'])) tm.assert_categorical_equal(result, expected) def test_from_inferred_categories_dtype(self): diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 6b0aa67ab25431..d61f6c7d863805 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -433,8 +433,9 @@ def test_astype(self): right=[2, 4], closed='right') + dtype = CategoricalDtype(categories=ii, ordered=True) ci = CategoricalIndex(Categorical.from_codes( - [0, 1, -1], categories=ii, ordered=True)) + [0, 1, -1], dtype=dtype)) result = ci.astype('interval') expected = ii.take([0, 1, -1])