From 28edd06498987972953b5c8e1c98b7f55d24b8d2 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 1 May 2018 01:45:19 -0500 Subject: [PATCH] PERF: Faster Series.__getattribute__ (#20834) --- asv_bench/benchmarks/series_methods.py | 13 +++++++++++++ doc/source/whatsnew/v0.23.0.txt | 1 + pandas/core/generic.py | 2 +- pandas/core/indexes/base.py | 13 +++++++++++++ pandas/tests/indexes/datetimelike.py | 5 +++++ pandas/tests/indexes/test_base.py | 5 +++++ pandas/tests/indexes/test_category.py | 5 +++++ pandas/tests/indexes/test_multi.py | 5 +++++ pandas/tests/indexes/test_numeric.py | 5 +++++ pandas/tests/indexes/test_range.py | 5 +++++ 10 files changed, 58 insertions(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py index 478aba278029c..3f6522c3403d9 100644 --- a/asv_bench/benchmarks/series_methods.py +++ b/asv_bench/benchmarks/series_methods.py @@ -121,3 +121,16 @@ def setup(self): def time_dir_strings(self): dir(self.s) + + +class SeriesGetattr(object): + # https://github.com/pandas-dev/pandas/issues/19764 + goal_time = 0.2 + + def setup(self): + self.s = Series(1, + index=date_range("2012-01-01", freq='s', + periods=int(1e6))) + + def time_series_datetimeindex_repr(self): + getattr(self.s, 'a', None) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 062a54431bc34..604b68b650201 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -961,6 +961,7 @@ Performance Improvements - Improved performance of :func:`pandas.core.groupby.GroupBy.any` and :func:`pandas.core.groupby.GroupBy.all` (:issue:`15435`) - Improved performance of :func:`pandas.core.groupby.GroupBy.pct_change` (:issue:`19165`) - Improved performance of :func:`Series.isin` in the case of categorical dtypes (:issue:`20003`) +- Improved performance of ``getattr(Series, attr)`` when the Series has certain index types. This manifiested in slow printing of large Series with a ``DatetimeIndex`` (:issue:`19764`) - Fixed a performance regression for :func:`GroupBy.nth` and :func:`GroupBy.last` with some object columns (:issue:`19283`) .. _whatsnew_0230.docs: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 6d55f92167d3b..af19acbb416ee 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4375,7 +4375,7 @@ def __getattr__(self, name): name in self._accessors): return object.__getattribute__(self, name) else: - if name in self._info_axis: + if self._info_axis._can_hold_identifiers_and_holds_name(name): return self[name] return object.__getattribute__(self, name) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 2ceec1592d49b..f208687a0cf1b 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2083,6 +2083,19 @@ def __getitem__(self, key): else: return result + def _can_hold_identifiers_and_holds_name(self, name): + """ + Faster check for ``name in self`` when we know `name` is a Python + identifier (e.g. in NDFrame.__getattr__, which hits this to support + . key lookup). For indexes that can't hold identifiers (everything + but object & categorical) we just return False. + + https://github.com/pandas-dev/pandas/issues/19764 + """ + if self.is_object() or self.is_categorical(): + return name in self + return False + def append(self, other): """ Append a collection of Index options together diff --git a/pandas/tests/indexes/datetimelike.py b/pandas/tests/indexes/datetimelike.py index 7d01a2a70145d..e32e18ea0ec4a 100644 --- a/pandas/tests/indexes/datetimelike.py +++ b/pandas/tests/indexes/datetimelike.py @@ -8,6 +8,11 @@ class DatetimeLike(Base): + def test_can_hold_identifiers(self): + idx = self.create_index() + key = idx[0] + assert idx._can_hold_identifiers_and_holds_name(key) is False + def test_shift_identity(self): idx = self.create_index() diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index afb6653b1e694..0a686ebdf5c3e 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -66,6 +66,11 @@ def generate_index_types(self, skip_index_keys=[]): if key not in skip_index_keys: yield key, index + def test_can_hold_identifiers(self): + idx = self.create_index() + key = idx[0] + assert idx._can_hold_identifiers_and_holds_name(key) is True + def test_new_axis(self): new_index = self.dateIndex[None, :] assert new_index.ndim == 2 diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index e9fddfde90348..6a1a1a5bdba4f 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -33,6 +33,11 @@ def create_index(self, categories=None, ordered=False): return CategoricalIndex( list('aabbca'), categories=categories, ordered=ordered) + def test_can_hold_identifiers(self): + idx = self.create_index(categories=list('abcd')) + key = idx[0] + assert idx._can_hold_identifiers_and_holds_name(key) is True + def test_construction(self): ci = self.create_index(categories=list('abcd')) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index da11ac645ae07..37f70090c179f 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -48,6 +48,11 @@ def setup_method(self, method): def create_index(self): return self.index + def test_can_hold_identifiers(self): + idx = self.create_index() + key = idx[0] + assert idx._can_hold_identifiers_and_holds_name(key) is True + def test_boolean_context_compat2(self): # boolean context compat diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index bafb6ae2e45f4..49322d9b7abd6 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -64,6 +64,11 @@ def test_index_rdiv_timedelta(self, scalar_td, index): class Numeric(Base): + def test_can_hold_identifiers(self): + idx = self.create_index() + key = idx[0] + assert idx._can_hold_identifiers_and_holds_name(key) is False + def test_numeric_compat(self): pass # override Base method diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index 8990834ebe91a..38f4b341116b8 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -44,6 +44,11 @@ def check_binop(self, ops, scalars, idxs): expected = op(Int64Index(idx), scalar) tm.assert_index_equal(result, expected) + def test_can_hold_identifiers(self): + idx = self.create_index() + key = idx[0] + assert idx._can_hold_identifiers_and_holds_name(key) is False + def test_binops(self): ops = [operator.add, operator.sub, operator.mul, operator.floordiv, operator.truediv]