From 61b14b2aebb5c9334e96af5d3083fcf575793a96 Mon Sep 17 00:00:00 2001 From: sinhrks Date: Thu, 4 Aug 2016 06:36:18 -0400 Subject: [PATCH] COMPAT: Categorical Subclassing xref #8640 Author: sinhrks Closes #13827 from sinhrks/categorical_subclass and squashes the following commits: 13c456c [sinhrks] COMPAT: Categorical Subclassing --- pandas/core/categorical.py | 62 +++++++++++++++++--------------- pandas/tests/test_categorical.py | 30 ++++++++++++++++ pandas/util/testing.py | 9 ++++- 3 files changed, 71 insertions(+), 30 deletions(-) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 39e140e962821..6ea0a5e96672d 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -328,11 +328,16 @@ def __init__(self, values, categories=None, ordered=False, self._categories = categories self._codes = _coerce_indexer_dtype(codes, categories) + @property + def _constructor(self): + return Categorical + def copy(self): """ Copy constructor. """ - return Categorical(values=self._codes.copy(), - categories=self.categories, ordered=self.ordered, - fastpath=True) + return self._constructor(values=self._codes.copy(), + categories=self.categories, + ordered=self.ordered, + fastpath=True) def astype(self, dtype, copy=True): """ @@ -414,7 +419,7 @@ def from_array(cls, data, **kwargs): Can be an Index or array-like. The categories are assumed to be the unique values of `data`. """ - return Categorical(data, **kwargs) + return cls(data, **kwargs) @classmethod def from_codes(cls, codes, categories, ordered=False, name=None): @@ -458,8 +463,8 @@ def from_codes(cls, codes, categories, ordered=False, name=None): raise ValueError("codes need to be between -1 and " "len(categories)-1") - return Categorical(codes, categories=categories, ordered=ordered, - fastpath=True) + return cls(codes, categories=categories, ordered=ordered, + fastpath=True) _codes = None @@ -916,9 +921,9 @@ def map(self, mapper): """ new_categories = self.categories.map(mapper) try: - return Categorical.from_codes(self._codes.copy(), - categories=new_categories, - ordered=self.ordered) + return self.from_codes(self._codes.copy(), + categories=new_categories, + ordered=self.ordered) except ValueError: return np.take(new_categories, self._codes) @@ -968,8 +973,8 @@ def shift(self, periods): else: codes[periods:] = -1 - return Categorical.from_codes(codes, categories=self.categories, - ordered=self.ordered) + return self.from_codes(codes, categories=self.categories, + ordered=self.ordered) def __array__(self, dtype=None): """ @@ -1159,8 +1164,8 @@ def value_counts(self, dropna=True): count = bincount(np.where(mask, code, ncat)) ix = np.append(ix, -1) - ix = Categorical(ix, categories=cat, ordered=obj.ordered, - fastpath=True) + ix = self._constructor(ix, categories=cat, ordered=obj.ordered, + fastpath=True) return Series(count, index=CategoricalIndex(ix), dtype='int64') @@ -1313,8 +1318,8 @@ def sort_values(self, inplace=False, ascending=True, na_position='last'): self._codes = codes return else: - return Categorical(values=codes, categories=self.categories, - ordered=self.ordered, fastpath=True) + return self._constructor(values=codes, categories=self.categories, + ordered=self.ordered, fastpath=True) def order(self, inplace=False, ascending=True, na_position='last'): """ @@ -1441,8 +1446,8 @@ def fillna(self, value=None, method=None, limit=None): values = values.copy() values[mask] = self.categories.get_loc(value) - return Categorical(values, categories=self.categories, - ordered=self.ordered, fastpath=True) + return self._constructor(values, categories=self.categories, + ordered=self.ordered, fastpath=True) def take_nd(self, indexer, allow_fill=True, fill_value=None): """ Take the codes by the indexer, fill with the fill_value. @@ -1455,8 +1460,8 @@ def take_nd(self, indexer, allow_fill=True, fill_value=None): assert isnull(fill_value) codes = take_1d(self._codes, indexer, allow_fill=True, fill_value=-1) - result = Categorical(codes, categories=self.categories, - ordered=self.ordered, fastpath=True) + result = self._constructor(codes, categories=self.categories, + ordered=self.ordered, fastpath=True) return result take = take_nd @@ -1476,8 +1481,8 @@ def _slice(self, slicer): slicer = slicer[1] _codes = self._codes[slicer] - return Categorical(values=_codes, categories=self.categories, - ordered=self.ordered, fastpath=True) + return self._constructor(values=_codes, categories=self.categories, + ordered=self.ordered, fastpath=True) def __len__(self): """The length of this Categorical.""" @@ -1588,10 +1593,9 @@ def __getitem__(self, key): else: return self.categories[i] else: - return Categorical(values=self._codes[key], - categories=self.categories, - ordered=self.ordered, - fastpath=True) + return self._constructor(values=self._codes[key], + categories=self.categories, + ordered=self.ordered, fastpath=True) def __setitem__(self, key, value): """ Item assignment. @@ -1742,8 +1746,8 @@ def mode(self): import pandas.hashtable as htable good = self._codes != -1 values = sorted(htable.mode_int64(_ensure_int64(self._codes[good]))) - result = Categorical(values=values, categories=self.categories, - ordered=self.ordered, fastpath=True) + result = self._constructor(values=values, categories=self.categories, + ordered=self.ordered, fastpath=True) return result def unique(self): @@ -1837,8 +1841,8 @@ def repeat(self, repeats, *args, **kwargs): """ nv.validate_repeat(args, kwargs) codes = self._codes.repeat(repeats) - return Categorical(values=codes, categories=self.categories, - ordered=self.ordered, fastpath=True) + return self._constructor(values=codes, categories=self.categories, + ordered=self.ordered, fastpath=True) # The Series.cat accessor diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 42636c6330fba..0e37f5bf17405 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -4415,6 +4415,36 @@ def test_concat_categorical(self): tm.assert_frame_equal(df_expected, df_concat) +class TestCategoricalSubclassing(tm.TestCase): + + _multiprocess_can_split_ = True + + def test_constructor(self): + sc = tm.SubclassedCategorical(['a', 'b', 'c']) + self.assertIsInstance(sc, tm.SubclassedCategorical) + tm.assert_categorical_equal(sc, Categorical(['a', 'b', 'c'])) + + def test_from_array(self): + sc = tm.SubclassedCategorical.from_codes([1, 0, 2], ['a', 'b', 'c']) + self.assertIsInstance(sc, tm.SubclassedCategorical) + exp = Categorical.from_codes([1, 0, 2], ['a', 'b', 'c']) + tm.assert_categorical_equal(sc, exp) + + def test_map(self): + sc = tm.SubclassedCategorical(['a', 'b', 'c']) + res = sc.map(lambda x: x.upper()) + self.assertIsInstance(res, tm.SubclassedCategorical) + exp = Categorical(['A', 'B', 'C']) + tm.assert_categorical_equal(res, exp) + + def test_map(self): + sc = tm.SubclassedCategorical(['a', 'b', 'c']) + res = sc.map(lambda x: x.upper()) + self.assertIsInstance(res, tm.SubclassedCategorical) + exp = Categorical(['A', 'B', 'C']) + tm.assert_categorical_equal(res, exp) + + if __name__ == '__main__': import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], diff --git a/pandas/util/testing.py b/pandas/util/testing.py index e4a84ea4ae296..c6573934bff57 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -43,7 +43,7 @@ from pandas.computation import expressions as expr -from pandas import (bdate_range, CategoricalIndex, DatetimeIndex, +from pandas import (bdate_range, CategoricalIndex, Categorical, DatetimeIndex, TimedeltaIndex, PeriodIndex, RangeIndex, Index, MultiIndex, Series, DataFrame, Panel, Panel4D) from pandas.util.decorators import deprecate @@ -2670,6 +2670,13 @@ def _constructor_sliced(self): return SubclassedSparseSeries +class SubclassedCategorical(Categorical): + + @property + def _constructor(self): + return SubclassedCategorical + + @contextmanager def patch(ob, attr, value): """Temporarily patch an attribute of an object.