Skip to content

Commit

Permalink
BUG GH23282 calling min on series of NaT returns NaT (pandas-dev#23289)
Browse files Browse the repository at this point in the history
  • Loading branch information
JustinZhengBC authored and Pingviinituutti committed Feb 28, 2019
1 parent 2389539 commit c5c18b3
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 15 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1030,6 +1030,7 @@ Datetimelike
- Bug in :func:`to_datetime` with an :class:`Index` argument that would drop the ``name`` from the result (:issue:`21697`)
- Bug in :class:`PeriodIndex` where adding or subtracting a :class:`timedelta` or :class:`Tick` object produced incorrect results (:issue:`22988`)
- Bug in :func:`date_range` when decrementing a start date to a past end date by a negative frequency (:issue:`23270`)
- Bug in :meth:`Series.min` which would return ``NaN`` instead of ``NaT`` when called on a series of ``NaT`` (:issue:`23282`)
- Bug in :func:`DataFrame.combine` with datetimelike values raising a TypeError (:issue:`23079`)
- Bug in :func:`date_range` with frequency of ``Day`` or higher where dates sufficiently far in the future could wrap around to the past instead of raising ``OutOfBoundsDatetime`` (:issue:`14187`)
- Bug in :class:`PeriodIndex` with attribute ``freq.n`` greater than 1 where adding a :class:`DateOffset` object would return incorrect results (:issue:`23215`)
Expand Down
38 changes: 23 additions & 15 deletions pandas/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ def _get_values(values, skipna, fill_value=None, fill_value_typ=None,
elif is_float_dtype(dtype):
dtype_max = np.float64

return values, mask, dtype, dtype_max
return values, mask, dtype, dtype_max, fill_value


def _isfinite(values):
Expand All @@ -266,16 +266,21 @@ def _view_if_needed(values):
return values


def _wrap_results(result, dtype):
def _wrap_results(result, dtype, fill_value=None):
""" wrap our results if needed """

if is_datetime64_dtype(dtype):
if not isinstance(result, np.ndarray):
assert not isna(fill_value), "Expected non-null fill_value"
if result == fill_value:
result = np.nan
result = tslibs.Timestamp(result)
else:
result = result.view(dtype)
elif is_timedelta64_dtype(dtype):
if not isinstance(result, np.ndarray):
if result == fill_value:
result = np.nan

# raise if we have a timedelta64[ns] which is too large
if np.fabs(result) > _int64_max:
Expand Down Expand Up @@ -346,8 +351,8 @@ def nanany(values, axis=None, skipna=True, mask=None):
>>> nanops.nanany(s)
False
"""
values, mask, dtype, _ = _get_values(values, skipna, False, copy=skipna,
mask=mask)
values, mask, dtype, _, _ = _get_values(values, skipna, False, copy=skipna,
mask=mask)
return values.any(axis)


Expand Down Expand Up @@ -379,8 +384,8 @@ def nanall(values, axis=None, skipna=True, mask=None):
>>> nanops.nanall(s)
False
"""
values, mask, dtype, _ = _get_values(values, skipna, True, copy=skipna,
mask=mask)
values, mask, dtype, _, _ = _get_values(values, skipna, True, copy=skipna,
mask=mask)
return values.all(axis)


Expand Down Expand Up @@ -409,7 +414,8 @@ def nansum(values, axis=None, skipna=True, min_count=0, mask=None):
>>> nanops.nansum(s)
3.0
"""
values, mask, dtype, dtype_max = _get_values(values, skipna, 0, mask=mask)
values, mask, dtype, dtype_max, _ = _get_values(values,
skipna, 0, mask=mask)
dtype_sum = dtype_max
if is_float_dtype(dtype):
dtype_sum = dtype
Expand Down Expand Up @@ -448,7 +454,8 @@ def nanmean(values, axis=None, skipna=True, mask=None):
>>> nanops.nanmean(s)
1.5
"""
values, mask, dtype, dtype_max = _get_values(values, skipna, 0, mask=mask)
values, mask, dtype, dtype_max, _ = _get_values(
values, skipna, 0, mask=mask)
dtype_sum = dtype_max
dtype_count = np.float64
if is_integer_dtype(dtype) or is_timedelta64_dtype(dtype):
Expand Down Expand Up @@ -501,7 +508,7 @@ def get_median(x):
return np.nan
return np.nanmedian(x[mask])

values, mask, dtype, dtype_max = _get_values(values, skipna, mask=mask)
values, mask, dtype, dtype_max, _ = _get_values(values, skipna, mask=mask)
if not is_float_dtype(values):
values = values.astype('f8')
values[mask] = np.nan
Expand Down Expand Up @@ -705,7 +712,8 @@ def nansem(values, axis=None, skipna=True, ddof=1, mask=None):
def _nanminmax(meth, fill_value_typ):
@bottleneck_switch()
def reduction(values, axis=None, skipna=True, mask=None):
values, mask, dtype, dtype_max = _get_values(

values, mask, dtype, dtype_max, fill_value = _get_values(
values, skipna, fill_value_typ=fill_value_typ, mask=mask)

if ((axis is not None and values.shape[axis] == 0) or
Expand All @@ -719,7 +727,7 @@ def reduction(values, axis=None, skipna=True, mask=None):
else:
result = getattr(values, meth)(axis)

result = _wrap_results(result, dtype)
result = _wrap_results(result, dtype, fill_value)
return _maybe_null_out(result, axis, mask)

reduction.__name__ = 'nan' + meth
Expand Down Expand Up @@ -753,8 +761,8 @@ def nanargmax(values, axis=None, skipna=True, mask=None):
>>> nanops.nanargmax(s)
4
"""
values, mask, dtype, _ = _get_values(values, skipna, fill_value_typ='-inf',
mask=mask)
values, mask, dtype, _, _ = _get_values(
values, skipna, fill_value_typ='-inf', mask=mask)
result = values.argmax(axis)
result = _maybe_arg_null_out(result, axis, mask, skipna)
return result
Expand Down Expand Up @@ -783,8 +791,8 @@ def nanargmin(values, axis=None, skipna=True, mask=None):
>>> nanops.nanargmin(s)
0
"""
values, mask, dtype, _ = _get_values(values, skipna, fill_value_typ='+inf',
mask=mask)
values, mask, dtype, _, _ = _get_values(
values, skipna, fill_value_typ='+inf', mask=mask)
result = values.argmin(axis)
result = _maybe_arg_null_out(result, axis, mask, skipna)
return result
Expand Down
18 changes: 18 additions & 0 deletions pandas/tests/series/test_datetime_values.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,3 +508,21 @@ def test_dt_timetz_accessor(self, tz_naive_fixture):
time(22, 14, tzinfo=tz)])
result = s.dt.timetz
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize('nat', [
pd.Series([pd.NaT, pd.NaT]),
pd.Series([pd.NaT, pd.Timedelta('nat')]),
pd.Series([pd.Timedelta('nat'), pd.Timedelta('nat')])])
def test_minmax_nat_series(self, nat):
# GH 23282
assert nat.min() is pd.NaT
assert nat.max() is pd.NaT

@pytest.mark.parametrize('nat', [
# GH 23282
pd.DataFrame([pd.NaT, pd.NaT]),
pd.DataFrame([pd.NaT, pd.Timedelta('nat')]),
pd.DataFrame([pd.Timedelta('nat'), pd.Timedelta('nat')])])
def test_minmax_nat_dataframe(self, nat):
assert nat.min()[0] is pd.NaT
assert nat.max()[0] is pd.NaT

0 comments on commit c5c18b3

Please sign in to comment.