Skip to content

Commit

Permalink
REF/TYP: use OpsMixin for DataFrame (#37044)
Browse files Browse the repository at this point in the history
* REF/TYP: use OpsMixin for arithmetic methods

* REF: separate arith_method_FRAME from flex_arith_method_FRAME

* whatsnew

* REF/TYP: use OpsMixin for logical methods (#36964)

* TST: insert 'match' to bare pytest raises in pandas/tests/tools/test_to_datetime.py (#37027)

* TST: insert 'match' to bare pytest raises in pandas/tests/test_flags.py (#37026)

Co-authored-by: Rajat Bishnoi <[email protected]>

* TYP: generic, series, frame (#36989)

* CI: pin pymysql #36465 (#36847)

* CI: unpin sql to verify the bugs #36465

* CI: pin sqlalchemy

* CI: pin pymsql

* CI: pin sqlalchemy

* CI: pin pymysql

* CI: pin pymysql

* CI: add note

* CLN/REF: de-duplicate DatetimeTZBlock.setitem (#37019)

* REF/TYP: define NDFrame numeric methods non-dynamically (#37017)

* CLN: require td64 in TimedeltaBlock (#37018)

* BUG: Raise ValueError instead of bare Exception in sanitize_array (#35769)

* CLN: collected cleanups, warning suppression in tests (#37021)

* REF/TYP: use OpsMixin for DataFrame

* CLN: remove get_op_name

* mypy fixup

* de-privatize

Co-authored-by: krajatcl <[email protected]>
Co-authored-by: Rajat Bishnoi <[email protected]>
Co-authored-by: Fangchen Li <[email protected]>
Co-authored-by: Micah Smith <[email protected]>
  • Loading branch information
5 people authored Oct 11, 2020
1 parent 4253abd commit 89cc5bf
Show file tree
Hide file tree
Showing 3 changed files with 161 additions and 276 deletions.
92 changes: 88 additions & 4 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@
relabel_result,
transform,
)
from pandas.core.arraylike import OpsMixin
from pandas.core.arrays import Categorical, ExtensionArray
from pandas.core.arrays.sparse import SparseFrameAccessor
from pandas.core.construction import extract_array
Expand Down Expand Up @@ -336,7 +337,7 @@
# DataFrame class


class DataFrame(NDFrame):
class DataFrame(NDFrame, OpsMixin):
"""
Two-dimensional, size-mutable, potentially heterogeneous tabular data.
Expand Down Expand Up @@ -5838,7 +5839,87 @@ def reorder_levels(self, order, axis=0) -> DataFrame:
return result

# ----------------------------------------------------------------------
# Arithmetic / combination related
# Arithmetic Methods

def _cmp_method(self, other, op):
axis = 1 # only relevant for Series other case

self, other = ops.align_method_FRAME(self, other, axis, flex=False, level=None)

# See GH#4537 for discussion of scalar op behavior
new_data = self._dispatch_frame_op(other, op, axis=axis)
return self._construct_result(new_data)

def _arith_method(self, other, op):
if ops.should_reindex_frame_op(self, other, op, 1, 1, None, None):
return ops.frame_arith_method_with_reindex(self, other, op)

axis = 1 # only relevant for Series other case

self, other = ops.align_method_FRAME(self, other, axis, flex=True, level=None)

new_data = self._dispatch_frame_op(other, op, axis=axis)
return self._construct_result(new_data)

_logical_method = _arith_method

def _dispatch_frame_op(self, right, func, axis: Optional[int] = None):
"""
Evaluate the frame operation func(left, right) by evaluating
column-by-column, dispatching to the Series implementation.
Parameters
----------
right : scalar, Series, or DataFrame
func : arithmetic or comparison operator
axis : {None, 0, 1}
Returns
-------
DataFrame
"""
# Get the appropriate array-op to apply to each column/block's values.
array_op = ops.get_array_op(func)

right = lib.item_from_zerodim(right)
if not is_list_like(right):
# i.e. scalar, faster than checking np.ndim(right) == 0
bm = self._mgr.apply(array_op, right=right)
return type(self)(bm)

elif isinstance(right, DataFrame):
assert self.index.equals(right.index)
assert self.columns.equals(right.columns)
# TODO: The previous assertion `assert right._indexed_same(self)`
# fails in cases with empty columns reached via
# _frame_arith_method_with_reindex

bm = self._mgr.operate_blockwise(right._mgr, array_op)
return type(self)(bm)

elif isinstance(right, Series) and axis == 1:
# axis=1 means we want to operate row-by-row
assert right.index.equals(self.columns)

right = right._values
# maybe_align_as_frame ensures we do not have an ndarray here
assert not isinstance(right, np.ndarray)

arrays = [array_op(l, r) for l, r in zip(self._iter_column_arrays(), right)]

elif isinstance(right, Series):
assert right.index.equals(self.index) # Handle other cases later
right = right._values

arrays = [array_op(l, right) for l in self._iter_column_arrays()]

else:
# Remaining cases have less-obvious dispatch rules
raise NotImplementedError(right)

return type(self)._from_arrays(
arrays, self.columns, self.index, verify_integrity=False
)

def _combine_frame(self, other: DataFrame, func, fill_value=None):
# at this point we have `self._indexed_same(other)`
Expand All @@ -5857,7 +5938,7 @@ def _arith_op(left, right):
left, right = ops.fill_binop(left, right, fill_value)
return func(left, right)

new_data = ops.dispatch_to_series(self, other, _arith_op)
new_data = self._dispatch_frame_op(other, _arith_op)
return new_data

def _construct_result(self, result) -> DataFrame:
Expand All @@ -5879,6 +5960,9 @@ def _construct_result(self, result) -> DataFrame:
out.index = self.index
return out

# ----------------------------------------------------------------------
# Combination-Related

@Appender(
"""
Returns
Expand Down Expand Up @@ -7254,7 +7338,7 @@ def diff(self, periods: int = 1, axis: Axis = 0) -> DataFrame:
bm_axis = self._get_block_manager_axis(axis)

if bm_axis == 0 and periods != 0:
return self - self.shift(periods, axis=axis) # type: ignore[operator]
return self - self.shift(periods, axis=axis)

new_data = self._mgr.diff(n=periods, axis=bm_axis)
return self._constructor(new_data)
Expand Down
Loading

0 comments on commit 89cc5bf

Please sign in to comment.