From a3d5f1f3c9b629b96143b20846d4e2c2167a2c7a Mon Sep 17 00:00:00 2001 From: tp Date: Wed, 11 Mar 2020 23:20:13 +0000 Subject: [PATCH] PERF: _shallow_copy copies cache --- doc/source/whatsnew/v1.1.0.rst | 6 +++--- pandas/core/indexes/category.py | 12 ++++-------- pandas/core/indexes/datetimelike.py | 5 ++++- pandas/core/indexes/datetimes.py | 1 + pandas/core/indexes/interval.py | 12 ++++++++---- pandas/core/indexes/period.py | 1 + pandas/core/indexes/timedeltas.py | 1 + 7 files changed, 22 insertions(+), 16 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index c473500c205d81..2fa6f9adf57be0 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -188,9 +188,9 @@ Performance improvements - Performance improvement in :class:`Timedelta` constructor (:issue:`30543`) - Performance improvement in :class:`Timestamp` constructor (:issue:`30543`) - Performance improvement in flex arithmetic ops between :class:`DataFrame` and :class:`Series` with ``axis=0`` (:issue:`31296`) -- The internal :meth:`Index._shallow_copy` now copies cached attributes over to the new index, - avoiding creating these again on the new index. This can speed up many operations - that depend on creating copies of existing indexes (:issue:`28584`) +- The internal index method :meth:`~Index._shallow_copy` index classes now copies cached attributes over to the new index, + avoiding creating these again on the new index. This can speed up many operations that depend on creating copies of + existing indexes (:issue:`28584`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 5997843f7ac6d6..6388f2007cb127 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -233,6 +233,7 @@ def _simple_new(cls, values: Categorical, name: Label = None): result._data = values result.name = name + result._cache = {} result._reset_identity() result._no_setting_name = False @@ -242,14 +243,9 @@ def _simple_new(cls, values: Categorical, name: Label = None): @Appender(Index._shallow_copy.__doc__) def _shallow_copy(self, values=None, name: Label = no_default): - name = self.name if name is no_default else name - - if values is None: - values = self.values - - cat = Categorical(values, dtype=self.dtype) - - return type(self)._simple_new(cat, name=name) + if values is not None: + values = Categorical(values, dtype=self.dtype) + return super()._shallow_copy(values=values, name=name) def _is_dtype_compat(self, other) -> bool: """ diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 894e1d95a17bce..2355fd00e7d9b8 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -618,6 +618,7 @@ def _set_freq(self, freq): def _shallow_copy(self, values=None, name: Label = lib.no_default): name = self.name if name is lib.no_default else name + cache = self._cache if values is None else {} if values is None: values = self._data @@ -636,7 +637,9 @@ def _shallow_copy(self, values=None, name: Label = lib.no_default): del attributes["freq"] attributes["name"] = name - return type(self)._simple_new(values, **attributes) + result = self._simple_new(values, **attributes) + result._cache = cache + return result # -------------------------------------------------------------------- # Set Operation Methods diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 185ad8e4c365a3..c8035a9de432b5 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -268,6 +268,7 @@ def _simple_new(cls, values, name=None, freq=None, tz=None, dtype=None): result = object.__new__(cls) result._data = dtarr result.name = name + result._cache = {} result._no_setting_name = False # For groupby perf. See note in indexes/base about _index_data result._index_data = dtarr._data diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index efdaf5a331f5f6..d3b2b5f9d8ac01 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -243,6 +243,7 @@ def _simple_new(cls, array: IntervalArray, name: Label = None): result = IntervalMixin.__new__(cls) result._data = array result.name = name + result._cache = {} result._no_setting_name = False result._reset_identity() return result @@ -332,12 +333,15 @@ def from_tuples( # -------------------------------------------------------------------- @Appender(Index._shallow_copy.__doc__) - def _shallow_copy(self, values=None, **kwargs): + def _shallow_copy(self, values=None, name: Label = lib.no_default): + name = self.name if name is lib.no_default else name + cache = self._cache if values is None else {} if values is None: values = self._data - attributes = self._get_attributes_dict() - attributes.update(kwargs) - return self._simple_new(values, **attributes) + + result = self._simple_new(values, name=name) + result._cache = cache + return result @cache_readonly def _isnan(self): diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 9ab80c0b6145c8..8ae792d3f63b5a 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -233,6 +233,7 @@ def _simple_new(cls, values: PeriodArray, name: Label = None): # For groupby perf. See note in indexes/base about _index_data result._index_data = values._data result.name = name + result._cache = {} result._reset_identity() return result diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 5e4a8e83bd95b0..a6d9d4dfc330b4 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -180,6 +180,7 @@ def _simple_new(cls, values, name=None, freq=None, dtype=_TD_DTYPE): result = object.__new__(cls) result._data = values result._name = name + result._cache = {} # For groupby perf. See note in indexes/base about _index_data result._index_data = values._data