Skip to content

Commit

Permalink
PERF: Faster Series.__getattribute__ (#20834)
Browse files Browse the repository at this point in the history
  • Loading branch information
TomAugspurger authored and jorisvandenbossche committed May 1, 2018
1 parent f799916 commit 28edd06
Show file tree
Hide file tree
Showing 10 changed files with 58 additions and 1 deletion.
13 changes: 13 additions & 0 deletions asv_bench/benchmarks/series_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,3 +121,16 @@ def setup(self):

def time_dir_strings(self):
dir(self.s)


class SeriesGetattr(object):
# https://github.com/pandas-dev/pandas/issues/19764
goal_time = 0.2

def setup(self):
self.s = Series(1,
index=date_range("2012-01-01", freq='s',
periods=int(1e6)))

def time_series_datetimeindex_repr(self):
getattr(self.s, 'a', None)
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -961,6 +961,7 @@ Performance Improvements
- Improved performance of :func:`pandas.core.groupby.GroupBy.any` and :func:`pandas.core.groupby.GroupBy.all` (:issue:`15435`)
- Improved performance of :func:`pandas.core.groupby.GroupBy.pct_change` (:issue:`19165`)
- Improved performance of :func:`Series.isin` in the case of categorical dtypes (:issue:`20003`)
- Improved performance of ``getattr(Series, attr)`` when the Series has certain index types. This manifiested in slow printing of large Series with a ``DatetimeIndex`` (:issue:`19764`)
- Fixed a performance regression for :func:`GroupBy.nth` and :func:`GroupBy.last` with some object columns (:issue:`19283`)

.. _whatsnew_0230.docs:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -4375,7 +4375,7 @@ def __getattr__(self, name):
name in self._accessors):
return object.__getattribute__(self, name)
else:
if name in self._info_axis:
if self._info_axis._can_hold_identifiers_and_holds_name(name):
return self[name]
return object.__getattribute__(self, name)

Expand Down
13 changes: 13 additions & 0 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2083,6 +2083,19 @@ def __getitem__(self, key):
else:
return result

def _can_hold_identifiers_and_holds_name(self, name):
"""
Faster check for ``name in self`` when we know `name` is a Python
identifier (e.g. in NDFrame.__getattr__, which hits this to support
. key lookup). For indexes that can't hold identifiers (everything
but object & categorical) we just return False.
https://github.com/pandas-dev/pandas/issues/19764
"""
if self.is_object() or self.is_categorical():
return name in self
return False

def append(self, other):
"""
Append a collection of Index options together
Expand Down
5 changes: 5 additions & 0 deletions pandas/tests/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@

class DatetimeLike(Base):

def test_can_hold_identifiers(self):
idx = self.create_index()
key = idx[0]
assert idx._can_hold_identifiers_and_holds_name(key) is False

def test_shift_identity(self):

idx = self.create_index()
Expand Down
5 changes: 5 additions & 0 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,11 @@ def generate_index_types(self, skip_index_keys=[]):
if key not in skip_index_keys:
yield key, index

def test_can_hold_identifiers(self):
idx = self.create_index()
key = idx[0]
assert idx._can_hold_identifiers_and_holds_name(key) is True

def test_new_axis(self):
new_index = self.dateIndex[None, :]
assert new_index.ndim == 2
Expand Down
5 changes: 5 additions & 0 deletions pandas/tests/indexes/test_category.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@ def create_index(self, categories=None, ordered=False):
return CategoricalIndex(
list('aabbca'), categories=categories, ordered=ordered)

def test_can_hold_identifiers(self):
idx = self.create_index(categories=list('abcd'))
key = idx[0]
assert idx._can_hold_identifiers_and_holds_name(key) is True

def test_construction(self):

ci = self.create_index(categories=list('abcd'))
Expand Down
5 changes: 5 additions & 0 deletions pandas/tests/indexes/test_multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@ def setup_method(self, method):
def create_index(self):
return self.index

def test_can_hold_identifiers(self):
idx = self.create_index()
key = idx[0]
assert idx._can_hold_identifiers_and_holds_name(key) is True

def test_boolean_context_compat2(self):

# boolean context compat
Expand Down
5 changes: 5 additions & 0 deletions pandas/tests/indexes/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,11 @@ def test_index_rdiv_timedelta(self, scalar_td, index):

class Numeric(Base):

def test_can_hold_identifiers(self):
idx = self.create_index()
key = idx[0]
assert idx._can_hold_identifiers_and_holds_name(key) is False

def test_numeric_compat(self):
pass # override Base method

Expand Down
5 changes: 5 additions & 0 deletions pandas/tests/indexes/test_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@ def check_binop(self, ops, scalars, idxs):
expected = op(Int64Index(idx), scalar)
tm.assert_index_equal(result, expected)

def test_can_hold_identifiers(self):
idx = self.create_index()
key = idx[0]
assert idx._can_hold_identifiers_and_holds_name(key) is False

def test_binops(self):
ops = [operator.add, operator.sub, operator.mul, operator.floordiv,
operator.truediv]
Expand Down

0 comments on commit 28edd06

Please sign in to comment.