Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix (Series|DataFrame).interpolate for datetime dtypes #19291

Closed
wants to merge 6 commits into from
Closed
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,7 @@ Conversion


-
- Bug in :meth:`Series.interpolate` and :class:`DataFrame.interpolate` where ``dtype='datetime64[ns]'`` series and columns were ignored. (:issue:`19199`)
- Bug in ``.astype()`` to non-ns timedelta units would hold the incorrect dtype (:issue:`19176`, :issue:`19223`, :issue:`12425`)
- Bug in subtracting :class:`Series` from ``NaT`` incorrectly returning ``NaT`` (:issue:`19158`)
- Bug in comparison of timezone-aware :class:`DatetimeIndex` against ``NaT`` incorrectly raising ``TypeError`` (:issue:`19276`)
Expand Down
7 changes: 5 additions & 2 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -5151,8 +5151,11 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False,
raise ValueError("Only `method=linear` interpolation is supported "
"on MultiIndexes.")

if _maybe_transposed_self._data.get_dtype_counts().get(
'object') == len(_maybe_transposed_self.T):
dtype_counts = _maybe_transposed_self._data.get_dtype_counts()
if ('object' in dtype_counts and
dtype_counts.get('object') == len(_maybe_transposed_self.T)):
# Checking for 'object' lets us avoid sometimes-fragile tranpose
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

huh? where are you testing this

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a workaround until #19198 is fixed:

dti = pd.date_range('2016-01-01', periods=3, tz='US/Pacific').insert(1, pd.NaT)
ser = pd.Series(dti)
df = ser.to_frame()

>>> df.interpolate()
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "pandas/core/generic.py", line 5155, in interpolate
    'object') == len(_maybe_transposed_self.T):
  File "pandas/core/frame.py", line 1941, in transpose
    return super(DataFrame, self).transpose(1, 0, **kwargs)
  File "pandas/core/generic.py", line 616, in transpose
    new_values = self.values.transpose(axes_numbers)
  File "pandas/core/base.py", line 701, in transpose
    nv.validate_transpose(args, kwargs)
  File "pandas/compat/numpy/function.py", line 54, in __call__
    self.defaults)
  File "pandas/util/_validators.py", line 218, in validate_args_and_kwargs
    validate_kwargs(fname, kwargs, compat_args)
  File "pandas/util/_validators.py", line 157, in validate_kwargs
    _check_for_default_values(fname, kwds, compat_args)
  File "pandas/util/_validators.py", line 69, in _check_for_default_values
    format(fname=fname, arg=key)))
ValueError: the 'axes' parameter is not supported in the pandas implementation of transpose()

(ser.interpolate doesn't raise, just forgets to interpolate)

# call GH#19198
raise TypeError("Cannot interpolate with all NaNs.")

# create/use the index
Expand Down
111 changes: 91 additions & 20 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -1108,7 +1108,7 @@ def check_int_bool(self, inplace):
# a fill na type method
try:
m = missing.clean_fill_method(method)
except:
except ValueError:
m = None

if m is not None:
Expand All @@ -1123,7 +1123,7 @@ def check_int_bool(self, inplace):
# try an interp method
try:
m = missing.clean_interp_method(method, **kwargs)
except:
except ValueError:
m = None

if m is not None:
Expand Down Expand Up @@ -1182,24 +1182,9 @@ def _interpolate(self, method=None, index=None, values=None,
if fill_value is None:
fill_value = self.fill_value

if method in ('krogh', 'piecewise_polynomial', 'pchip'):
if not index.is_monotonic:
raise ValueError("{0} interpolation requires that the "
"index be monotonic.".format(method))
# process 1-d slices in the axis direction

def func(x):

# process a 1-d slice, returning it
# should the axis argument be handled below in apply_along_axis?
# i.e. not an arg to missing.interpolate_1d
return missing.interpolate_1d(index, x, method=method, limit=limit,
limit_direction=limit_direction,
fill_value=fill_value,
bounds_error=False, **kwargs)

# interp each column independently
interp_values = np.apply_along_axis(func, axis, data)
interp_values = _interpolate_values(method, data, index, axis,
limit, limit_direction,
fill_value, **kwargs)

blocks = [self.make_block(interp_values, klass=self.__class__,
fastpath=True)]
Expand Down Expand Up @@ -2594,6 +2579,32 @@ def set(self, locs, values, check=False):

self.values[locs] = values

def _interpolate(self, method=None, index=None, values=None,
fill_value=None, axis=0, limit=None,
limit_direction='forward', inplace=False, downcast=None,
mgr=None, **kwargs):
""" interpolate using scipy wrappers, adapted to datetime64 values"""

inplace = validate_bool_kwarg(inplace, 'inplace')
data = self.values if inplace else self.values.copy()

# only deal with floats
mask = isna(self.values)
data = data.astype(np.float64)
data[mask] = np.nan

if fill_value is None:
fill_value = self.fill_value

interp_values = _interpolate_values(method, data, index, axis,
limit, limit_direction,
fill_value, **kwargs)
interp_values = interp_values.astype(self.dtype)

blocks = [self.make_block(interp_values, klass=self.__class__,
fastpath=True)]
return self._maybe_downcast(blocks, downcast)


class DatetimeTZBlock(NonConsolidatableMixIn, DatetimeBlock):
""" implement a datetime64 block with a tz attribute """
Expand Down Expand Up @@ -2750,6 +2761,43 @@ def concat_same_type(self, to_concat, placement=None):
return make_block(
values, placement=placement or slice(0, len(values), 1))

def _interpolate(self, method=None, index=None, values=None,
fill_value=None, axis=0, limit=None,
limit_direction='forward', inplace=False, downcast=None,
mgr=None, **kwargs):
""" interpolate using scipy wrappers, adapted to datetime64 values"""

inplace = validate_bool_kwarg(inplace, 'inplace')
data = self.values if inplace else self.values.copy()

# only deal with floats
mask = isna(self.values)

# Convert to UTC for interpolation
data = data.tz_convert('UTC').values

# data is 1D because it comes from a DatetimeIndex, but we need ndim
# to match self.ndim
data = data.reshape(self.shape)
mask = mask.reshape(self.shape)
data = data.astype(np.float64)
data[mask] = np.nan

if fill_value is None:
fill_value = self.fill_value

interp_values = _interpolate_values(method, data, index, axis,
limit, limit_direction,
fill_value, **kwargs)

interp_values = interp_values.squeeze()
utc_values = self._holder(interp_values, tz='UTC')
interp_values = utc_values.tz_convert(self.values.tz)

blocks = [self.make_block(interp_values, klass=self.__class__,
fastpath=True)]
return self._maybe_downcast(blocks, downcast)


class SparseBlock(NonConsolidatableMixIn, Block):
""" implement as a list of sparse arrays of the same dtype """
Expand Down Expand Up @@ -5671,3 +5719,26 @@ def _preprocess_slice_or_indexer(slice_or_indexer, length, allow_fill):
if not allow_fill:
indexer = maybe_convert_indices(indexer, length)
return 'fancy', indexer, len(indexer)


def _interpolate_values(method, data, index, axis, limit, limit_direction,
fill_value, **kwargs):
"""interpolate using scipy wrappers"""
if method in ('krogh', 'piecewise_polynomial', 'pchip'):
if not index.is_monotonic:
raise ValueError("{0} interpolation requires that the "
"index be monotonic.".format(method))
# process 1-d slices in the axis direction

def func(x):
# process a 1-d slice, returning it
# should the axis argument be handled below in apply_along_axis?
# i.e. not an arg to missing.interpolate_1d
return missing.interpolate_1d(index, x, method=method, limit=limit,
limit_direction=limit_direction,
fill_value=fill_value,
bounds_error=False, **kwargs)

# interp each column independently
interp_values = np.apply_along_axis(func, axis, data)
return interp_values
13 changes: 13 additions & 0 deletions pandas/tests/frame/test_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -816,3 +816,16 @@ def test_interp_ignore_all_good(self):
# all good
result = df[['B', 'D']].interpolate(downcast=None)
assert_frame_equal(result, df[['B', 'D']])

@pytest.mark.parametrize('tz', [None, 'US/Central'])
def test_interpolate_dt64_values(self, tz):
index = pd.Index([23, 26, 30])
dti = pd.DatetimeIndex(['2015-09-23', '2015-09-26', '2015-09-30'],
tz=tz)
df = DataFrame(dti, index=index).reindex(range(23, 31))

dti_ex = pd.date_range('2015-09-23', '2015-09-30', tz=tz)
expected = DataFrame(dti_ex, index=df.index)

result = df.interpolate()
assert_frame_equal(expected, result)
13 changes: 13 additions & 0 deletions pandas/tests/series/test_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1278,3 +1278,16 @@ def test_series_interpolate_intraday(self):
result = ts.reindex(new_index).interpolate(method='time')

tm.assert_numpy_array_equal(result.values, exp.values)

@pytest.mark.parametrize('tz', [None, 'US/Central'])
def test_interpolate_dt64_values(self, tz):
index = pd.Index([23, 26, 30])
dti = pd.DatetimeIndex(['2015-09-23', '2015-09-26', '2015-09-30'],
tz=tz)
ser = pd.Series(dti, index=index).reindex(range(23, 31))

dti_ex = pd.date_range('2015-09-23', '2015-09-30', tz=tz)
expected = pd.Series(dti_ex, index=ser.index)

result = ser.interpolate()
tm.assert_series_equal(expected, result)