Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

REF: collect ops dispatch functions in one place, try to de-duplicate SparseDataFrame methods #23060

Merged
merged 17 commits into from
Oct 28, 2018
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
c01c19a
collect dispatch functions in one place
jbrockmendel Oct 9, 2018
f0e0a4e
remove unused try_cast args; try to make SparseDataFrame methods more…
jbrockmendel Oct 9, 2018
30f3737
Use align methods in SparseDataFrame methods to move towards sharing …
jbrockmendel Oct 9, 2018
5f9d111
typo fixup
jbrockmendel Oct 9, 2018
f236663
fixup copy/paste mistake
jbrockmendel Oct 9, 2018
1a556bc
Merge branch 'master' of https://github.com/pandas-dev/pandas into fa…
jbrockmendel Oct 9, 2018
bcb1c35
Merge branch 'master' of https://github.com/pandas-dev/pandas into fa…
jbrockmendel Oct 10, 2018
1c9b86b
keep collecting arithmetic tests
jbrockmendel Oct 10, 2018
a2d1a56
keep collecting Series arith tests
jbrockmendel Oct 10, 2018
27c40cb
Merge branch 'master' of https://github.com/pandas-dev/pandas into fa…
jbrockmendel Oct 10, 2018
9835825
Merge branch 'master' of https://github.com/pandas-dev/pandas into fa…
jbrockmendel Oct 21, 2018
9737aee
Merge branch 'master' of https://github.com/pandas-dev/pandas into fa…
jbrockmendel Oct 23, 2018
945beb2
Merge branch 'master' of https://github.com/pandas-dev/pandas into fa…
jbrockmendel Oct 23, 2018
ecaac45
fixup duplicate import
jbrockmendel Oct 23, 2018
1d08646
Merge branch 'master' of https://github.com/pandas-dev/pandas into fa…
jbrockmendel Oct 25, 2018
11219fe
Merge branch 'master' of https://github.com/pandas-dev/pandas into fa…
jbrockmendel Oct 28, 2018
c431373
post-merge cleanup
jbrockmendel Oct 28, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4971,7 +4971,7 @@ def _combine_match_index(self, other, func, level=None):
index=left.index, columns=self.columns,
copy=False)

def _combine_match_columns(self, other, func, level=None, try_cast=True):
def _combine_match_columns(self, other, func, level=None):
assert isinstance(other, Series)
left, right = self.align(other, join='outer', axis=1, level=level,
copy=False)
Expand Down
270 changes: 134 additions & 136 deletions pandas/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -943,6 +943,134 @@ def should_series_dispatch(left, right, op):
return False


def dispatch_to_series(left, right, func, str_rep=None, axis=None):
"""
Evaluate the frame operation func(left, right) by evaluating
column-by-column, dispatching to the Series implementation.

Parameters
----------
left : DataFrame
right : scalar or DataFrame
func : arithmetic or comparison operator
str_rep : str or None, default None
axis : {None, 0, 1, "index", "columns"}

Returns
-------
DataFrame
"""
# Note: we use iloc to access columns for compat with cases
# with non-unique columns.
import pandas.core.computation.expressions as expressions
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can this be imported at the top?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not 100% sure, but I think this is a run-time import to make import pandas as pd faster


right = lib.item_from_zerodim(right)
if lib.is_scalar(right):

def column_op(a, b):
return {i: func(a.iloc[:, i], b)
for i in range(len(a.columns))}

elif isinstance(right, ABCDataFrame):
assert right._indexed_same(left)

def column_op(a, b):
return {i: func(a.iloc[:, i], b.iloc[:, i])
for i in range(len(a.columns))}

elif isinstance(right, ABCSeries) and axis == "columns":
# We only get here if called via left._combine_match_columns,
# in which case we specifically want to operate row-by-row
assert right.index.equals(left.columns)

def column_op(a, b):
return {i: func(a.iloc[:, i], b.iloc[i])
for i in range(len(a.columns))}

elif isinstance(right, ABCSeries):
assert right.index.equals(left.index) # Handle other cases later

def column_op(a, b):
return {i: func(a.iloc[:, i], b)
for i in range(len(a.columns))}

else:
# Remaining cases have less-obvious dispatch rules
raise NotImplementedError(right)

new_data = expressions.evaluate(column_op, str_rep, left, right)

result = left._constructor(new_data, index=left.index, copy=False)
# Pin columns instead of passing to constructor for compat with
# non-unique columns case
result.columns = left.columns
return result


def dispatch_to_index_op(op, left, right, index_class):
"""
Wrap Series left in the given index_class to delegate the operation op
to the index implementation. DatetimeIndex and TimedeltaIndex perform
type checking, timezone handling, overflow checks, etc.

Parameters
----------
op : binary operator (operator.add, operator.sub, ...)
left : Series
right : object
index_class : DatetimeIndex or TimedeltaIndex

Returns
-------
result : object, usually DatetimeIndex, TimedeltaIndex, or Series
"""
left_idx = index_class(left)

# avoid accidentally allowing integer add/sub. For datetime64[tz] dtypes,
# left_idx may inherit a freq from a cached DatetimeIndex.
# See discussion in GH#19147.
if getattr(left_idx, 'freq', None) is not None:
left_idx = left_idx._shallow_copy(freq=None)
try:
result = op(left_idx, right)
except NullFrequencyError:
# DatetimeIndex and TimedeltaIndex with freq == None raise ValueError
# on add/sub of integers (or int-like). We re-raise as a TypeError.
raise TypeError('incompatible type for a datetime/timedelta '
'operation [{name}]'.format(name=op.__name__))
return result


def dispatch_to_extension_op(op, left, right):
"""
Assume that left or right is a Series backed by an ExtensionArray,
apply the operator defined by op.
"""

# The op calls will raise TypeError if the op is not defined
# on the ExtensionArray

# unbox Series and Index to arrays
if isinstance(left, (ABCSeries, ABCIndexClass)):
new_left = left._values
else:
new_left = left

if isinstance(right, (ABCSeries, ABCIndexClass)):
new_right = right._values
else:
new_right = right

res_values = op(new_left, new_right)
res_name = get_op_result_name(left, right)

if op.__name__ in ['divmod', 'rdivmod']:
return _construct_divmod_result(
left, res_values, left.index, res_name)

return _construct_result(left, res_values, left.index, res_name)


# -----------------------------------------------------------------------------
# Functions that add arithmetic methods to objects, given arithmetic factory
# methods
Expand Down Expand Up @@ -1202,36 +1330,6 @@ def _construct_divmod_result(left, result, index, name, dtype=None):
)


def dispatch_to_extension_op(op, left, right):
"""
Assume that left or right is a Series backed by an ExtensionArray,
apply the operator defined by op.
"""

# The op calls will raise TypeError if the op is not defined
# on the ExtensionArray

# unbox Series and Index to arrays
if isinstance(left, (ABCSeries, ABCIndexClass)):
new_left = left._values
else:
new_left = left

if isinstance(right, (ABCSeries, ABCIndexClass)):
new_right = right._values
else:
new_right = right

res_values = op(new_left, new_right)
res_name = get_op_result_name(left, right)

if op.__name__ in ['divmod', 'rdivmod']:
return _construct_divmod_result(
left, res_values, left.index, res_name)

return _construct_result(left, res_values, left.index, res_name)


def _arith_method_SERIES(cls, op, special):
"""
Wrapper function for Series arithmetic operations, to avoid
Expand Down Expand Up @@ -1329,40 +1427,6 @@ def wrapper(left, right):
return wrapper


def dispatch_to_index_op(op, left, right, index_class):
"""
Wrap Series left in the given index_class to delegate the operation op
to the index implementation. DatetimeIndex and TimedeltaIndex perform
type checking, timezone handling, overflow checks, etc.

Parameters
----------
op : binary operator (operator.add, operator.sub, ...)
left : Series
right : object
index_class : DatetimeIndex or TimedeltaIndex

Returns
-------
result : object, usually DatetimeIndex, TimedeltaIndex, or Series
"""
left_idx = index_class(left)

# avoid accidentally allowing integer add/sub. For datetime64[tz] dtypes,
# left_idx may inherit a freq from a cached DatetimeIndex.
# See discussion in GH#19147.
if getattr(left_idx, 'freq', None) is not None:
left_idx = left_idx._shallow_copy(freq=None)
try:
result = op(left_idx, right)
except NullFrequencyError:
# DatetimeIndex and TimedeltaIndex with freq == None raise ValueError
# on add/sub of integers (or int-like). We re-raise as a TypeError.
raise TypeError('incompatible type for a datetime/timedelta '
'operation [{name}]'.format(name=op.__name__))
return result


def _comp_method_OBJECT_ARRAY(op, x, y):
if isinstance(y, list):
y = construct_1d_object_array_from_listlike(y)
Expand Down Expand Up @@ -1661,72 +1725,9 @@ def flex_wrapper(self, other, level=None, fill_value=None, axis=0):
# -----------------------------------------------------------------------------
# DataFrame

def dispatch_to_series(left, right, func, str_rep=None, axis=None):
"""
Evaluate the frame operation func(left, right) by evaluating
column-by-column, dispatching to the Series implementation.

Parameters
----------
left : DataFrame
right : scalar or DataFrame
func : arithmetic or comparison operator
str_rep : str or None, default None
axis : {None, 0, 1, "index", "columns"}

Returns
-------
DataFrame
"""
# Note: we use iloc to access columns for compat with cases
# with non-unique columns.
import pandas.core.computation.expressions as expressions

right = lib.item_from_zerodim(right)
if lib.is_scalar(right):

def column_op(a, b):
return {i: func(a.iloc[:, i], b)
for i in range(len(a.columns))}

elif isinstance(right, ABCDataFrame):
assert right._indexed_same(left)

def column_op(a, b):
return {i: func(a.iloc[:, i], b.iloc[:, i])
for i in range(len(a.columns))}

elif isinstance(right, ABCSeries) and axis == "columns":
# We only get here if called via left._combine_match_columns,
# in which case we specifically want to operate row-by-row
assert right.index.equals(left.columns)

def column_op(a, b):
return {i: func(a.iloc[:, i], b.iloc[i])
for i in range(len(a.columns))}

elif isinstance(right, ABCSeries):
assert right.index.equals(left.index) # Handle other cases later

def column_op(a, b):
return {i: func(a.iloc[:, i], b)
for i in range(len(a.columns))}

else:
# Remaining cases have less-obvious dispatch rules
raise NotImplementedError(right)

new_data = expressions.evaluate(column_op, str_rep, left, right)

result = left._constructor(new_data, index=left.index, copy=False)
# Pin columns instead of passing to constructor for compat with
# non-unique columns case
result.columns = left.columns
return result


def _combine_series_frame(self, other, func, fill_value=None, axis=None,
level=None, try_cast=True):
level=None):
"""
Apply binary operator `func` to self, other using alignment and fill
conventions determined by the fill_value, axis, level, and try_cast kwargs.
Expand All @@ -1739,7 +1740,6 @@ def _combine_series_frame(self, other, func, fill_value=None, axis=None,
fill_value : object, default None
axis : {0, 1, 'columns', 'index', None}, default None
level : int or None, default None
try_cast : bool, default True

Returns
-------
Expand All @@ -1754,8 +1754,7 @@ def _combine_series_frame(self, other, func, fill_value=None, axis=None,
if axis == 0:
return self._combine_match_index(other, func, level=level)
else:
return self._combine_match_columns(other, func, level=level,
try_cast=try_cast)
return self._combine_match_columns(other, func, level=level)
else:
if not len(other):
return self * np.nan
Expand All @@ -1766,8 +1765,7 @@ def _combine_series_frame(self, other, func, fill_value=None, axis=None,
columns=self.columns)

# default axis is columns
return self._combine_match_columns(other, func, level=level,
try_cast=try_cast)
return self._combine_match_columns(other, func, level=level)


def _align_method_FRAME(left, right, axis):
Expand Down Expand Up @@ -1867,7 +1865,7 @@ def f(self, other, axis=default_axis, level=None, fill_value=None):
pass_op = op if axis in [0, "columns", None] else na_op
return _combine_series_frame(self, other, pass_op,
fill_value=fill_value, axis=axis,
level=level, try_cast=True)
level=level)
else:
if fill_value is not None:
self = self.fillna(fill_value)
Expand Down Expand Up @@ -1909,7 +1907,7 @@ def f(self, other, axis=default_axis, level=None):
elif isinstance(other, ABCSeries):
return _combine_series_frame(self, other, na_op,
fill_value=None, axis=axis,
level=level, try_cast=False)
level=level)
else:
return self._combine_const(other, na_op, try_cast=False)

Expand Down Expand Up @@ -1937,7 +1935,7 @@ def f(self, other):
elif isinstance(other, ABCSeries):
return _combine_series_frame(self, other, func,
fill_value=None, axis=None,
level=None, try_cast=False)
level=None)
else:

# straight boolean comparisons we want to allow all columns
Expand Down
Loading