PERF: Faster Series.__getattribute__ (#20834)

pandas-dev · May 1, 2018 · 28edd06 · 28edd06
1 parent f799916
commit 28edd06
Show file tree

Hide file tree

Showing 10 changed files with 58 additions and 1 deletion.
diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py
@@ -121,3 +121,16 @@ def setup(self):
 
     def time_dir_strings(self):
         dir(self.s)
+
+
+class SeriesGetattr(object):
+    # https://github.com/pandas-dev/pandas/issues/19764
+    goal_time = 0.2
+
+    def setup(self):
+        self.s = Series(1,
+                        index=date_range("2012-01-01", freq='s',
+                                         periods=int(1e6)))
+
+    def time_series_datetimeindex_repr(self):
+        getattr(self.s, 'a', None)
diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
@@ -961,6 +961,7 @@ Performance Improvements
 - Improved performance of :func:`pandas.core.groupby.GroupBy.any` and :func:`pandas.core.groupby.GroupBy.all` (:issue:`15435`)
 - Improved performance of :func:`pandas.core.groupby.GroupBy.pct_change` (:issue:`19165`)
 - Improved performance of :func:`Series.isin` in the case of categorical dtypes (:issue:`20003`)
+- Improved performance of ``getattr(Series, attr)`` when the Series has certain index types. This manifiested in slow printing of large Series with a ``DatetimeIndex`` (:issue:`19764`)
 - Fixed a performance regression for :func:`GroupBy.nth` and :func:`GroupBy.last` with some object columns (:issue:`19283`)
 
 .. _whatsnew_0230.docs:

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -4375,7 +4375,7 @@ def __getattr__(self, name):
                 name in self._accessors):
             return object.__getattribute__(self, name)
         else:
-            if name in self._info_axis:
+            if self._info_axis._can_hold_identifiers_and_holds_name(name):
                 return self[name]
             return object.__getattribute__(self, name)
 

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -2083,6 +2083,19 @@ def __getitem__(self, key):
         else:
             return result
 
+    def _can_hold_identifiers_and_holds_name(self, name):
+        """
+        Faster check for ``name in self`` when we know `name` is a Python
+        identifier (e.g. in NDFrame.__getattr__, which hits this to support
+        . key lookup). For indexes that can't hold identifiers (everything
+        but object & categorical) we just return False.
+
+        https://github.com/pandas-dev/pandas/issues/19764
+        """
+        if self.is_object() or self.is_categorical():
+            return name in self
+        return False
+
     def append(self, other):
         """
         Append a collection of Index options together

diff --git a/pandas/tests/indexes/datetimelike.py b/pandas/tests/indexes/datetimelike.py
@@ -8,6 +8,11 @@
 
 class DatetimeLike(Base):
 
+    def test_can_hold_identifiers(self):
+        idx = self.create_index()
+        key = idx[0]
+        assert idx._can_hold_identifiers_and_holds_name(key) is False
+
     def test_shift_identity(self):
 
         idx = self.create_index()

diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
@@ -66,6 +66,11 @@ def generate_index_types(self, skip_index_keys=[]):
             if key not in skip_index_keys:
                 yield key, index
 
+    def test_can_hold_identifiers(self):
+        idx = self.create_index()
+        key = idx[0]
+        assert idx._can_hold_identifiers_and_holds_name(key) is True
+
     def test_new_axis(self):
         new_index = self.dateIndex[None, :]
         assert new_index.ndim == 2

diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py
@@ -33,6 +33,11 @@ def create_index(self, categories=None, ordered=False):
         return CategoricalIndex(
             list('aabbca'), categories=categories, ordered=ordered)
 
+    def test_can_hold_identifiers(self):
+        idx = self.create_index(categories=list('abcd'))
+        key = idx[0]
+        assert idx._can_hold_identifiers_and_holds_name(key) is True
+
     def test_construction(self):
 
         ci = self.create_index(categories=list('abcd'))

diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py
@@ -48,6 +48,11 @@ def setup_method(self, method):
     def create_index(self):
         return self.index
 
+    def test_can_hold_identifiers(self):
+        idx = self.create_index()
+        key = idx[0]
+        assert idx._can_hold_identifiers_and_holds_name(key) is True
+
     def test_boolean_context_compat2(self):
 
         # boolean context compat

diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py
@@ -64,6 +64,11 @@ def test_index_rdiv_timedelta(self, scalar_td, index):
 
 class Numeric(Base):
 
+    def test_can_hold_identifiers(self):
+        idx = self.create_index()
+        key = idx[0]
+        assert idx._can_hold_identifiers_and_holds_name(key) is False
+
     def test_numeric_compat(self):
         pass  # override Base method
 

diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py
@@ -44,6 +44,11 @@ def check_binop(self, ops, scalars, idxs):
                     expected = op(Int64Index(idx), scalar)
                     tm.assert_index_equal(result, expected)
 
+    def test_can_hold_identifiers(self):
+        idx = self.create_index()
+        key = idx[0]
+        assert idx._can_hold_identifiers_and_holds_name(key) is False
+
     def test_binops(self):
         ops = [operator.add, operator.sub, operator.mul, operator.floordiv,
                operator.truediv]