From ff6d12f028f1eca38b1edb198bd833297cf2cfbb Mon Sep 17 00:00:00 2001 From: Carlo Barth Date: Tue, 2 Jan 2024 12:13:28 +0100 Subject: [PATCH 01/29] fix: Fixes wrong doctest output in `pandas.core.resample.Resampler.interpolate` and the related explanation about consideration of anchor points when interpolating downsampled series with non-aligned result index. --- pandas/core/resample.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 48a5f85e1c388..7d4ec84b8cb50 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1062,25 +1062,26 @@ def interpolate( Freq: 500ms, dtype: float64 Internal reindexing with ``asfreq()`` prior to interpolation leads to - an interpolated timeseries on the basis the reindexed timestamps (anchors). - Since not all datapoints from original series become anchors, - it can lead to misleading interpolation results as in the following example: + an interpolated timeseries on the basis of the reindexed timestamps + (anchors). It is assured that all available datapoints from original + series become anchors, so it also works for resampling-cases that lead + to non-aligned timestamps, as in the following example: >>> series.resample("400ms").interpolate("linear") 2023-03-01 07:00:00.000 1.0 - 2023-03-01 07:00:00.400 1.2 - 2023-03-01 07:00:00.800 1.4 - 2023-03-01 07:00:01.200 1.6 - 2023-03-01 07:00:01.600 1.8 + 2023-03-01 07:00:00.400 0.2 + 2023-03-01 07:00:00.800 -0.6 + 2023-03-01 07:00:01.200 -0.4 + 2023-03-01 07:00:01.600 0.8 2023-03-01 07:00:02.000 2.0 - 2023-03-01 07:00:02.400 2.2 - 2023-03-01 07:00:02.800 2.4 - 2023-03-01 07:00:03.200 2.6 - 2023-03-01 07:00:03.600 2.8 + 2023-03-01 07:00:02.400 1.6 + 2023-03-01 07:00:02.800 1.2 + 2023-03-01 07:00:03.200 1.4 + 2023-03-01 07:00:03.600 2.2 2023-03-01 07:00:04.000 3.0 Freq: 400ms, dtype: float64 - Note that the series erroneously increases between two anchors + Note that the series correctly decreases between two anchors ``07:00:00`` and ``07:00:02``. """ assert downcast is lib.no_default # just checking coverage From 1593af0fe8530ea994b6789e5b819ff0d6b742b3 Mon Sep 17 00:00:00 2001 From: Carlo Barth Date: Tue, 2 Jan 2024 12:19:18 +0100 Subject: [PATCH 02/29] Resolved merge conflicts --- pandas/core/missing.py | 11 +++- pandas/core/resample.py | 25 +++++++- pandas/tests/resample/test_base.py | 92 ++++++++++++++++++++++++++++++ 3 files changed, 126 insertions(+), 2 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 0d857f6b21517..de13864bd74b6 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -304,7 +304,16 @@ def get_interp_index(method, index: Index) -> Index: # prior default from pandas import Index - index = Index(np.arange(len(index))) + if isinstance(index.dtype, DatetimeTZDtype) or lib.is_np_dtype( + index.dtype, "mM" + ): + # Convert datetime-like indexes to int64 + index = Index(index.view("i8")) + + elif not is_numeric_dtype(index.dtype): + # We keep behavior consistent with prior versions of pandas for + # non-numeric, non-datetime indexes + index = Index(np.arange(len(index))) else: methods = {"index", "values", "nearest", "time"} is_numeric_or_datetime = ( diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 7d4ec84b8cb50..974a0a8c08a15 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -83,6 +83,7 @@ TimedeltaIndex, timedelta_range, ) +from pandas.core.reshape.concat import concat from pandas.tseries.frequencies import ( is_subperiod, @@ -1086,7 +1087,23 @@ def interpolate( """ assert downcast is lib.no_default # just checking coverage result = self._upsample("asfreq") - return result.interpolate( + + # If the original data has timestamps which are not aligned with the + # target timestamps, we need to add those points back to the data frame + # that is supposed to be interpolated. This does not work with + # PeriodIndex, so we skip this case. + obj = self._selected_obj + is_period_index = isinstance(obj.index, PeriodIndex) + + if not is_period_index: + final_index = result.index + missing_data_points_index = obj.index.difference(final_index) + if len(missing_data_points_index) > 0: + result = concat( + [result, obj.loc[missing_data_points_index]] + ).sort_index() + + result_interpolated = result.interpolate( method=method, axis=axis, limit=limit, @@ -1097,6 +1114,12 @@ def interpolate( **kwargs, ) + # We make sure that original data points which do not align with the + # resampled index are removed + if is_period_index: + return result_interpolated + return result_interpolated.loc[final_index] + @final def asfreq(self, fill_value=None): """ diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index f20518c7be98a..555e179f7c1cf 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -21,6 +21,51 @@ from pandas.core.indexes.timedeltas import timedelta_range from pandas.core.resample import _asfreq_compat +# a fixture value can be overridden by the test parameter value. Note that the +# value of the fixture can be overridden this way even if the test doesn't use +# it directly (doesn't mention it in the function prototype). +# see https://docs.pytest.org/en/latest/fixture.html#override-a-fixture-with-direct-test-parametrization # noqa: E501 +# in this module we override the fixture values defined in conftest.py +# tuples of '_index_factory,_series_name,_index_start,_index_end' +DATE_RANGE = (date_range, "dti", datetime(2005, 1, 1), datetime(2005, 1, 10)) +PERIOD_RANGE = (period_range, "pi", datetime(2005, 1, 1), datetime(2005, 1, 10)) +TIMEDELTA_RANGE = (timedelta_range, "tdi", "1 day", "10 day") + +all_ts = pytest.mark.parametrize( + "_index_factory,_series_name,_index_start,_index_end", + [DATE_RANGE, PERIOD_RANGE, TIMEDELTA_RANGE], +) + +all_1d_no_arg_interpolation_methods = pytest.mark.parametrize( + "method", + [ + "linear", + "time", + "index", + "values", + "nearest", + "zero", + "slinear", + "quadratic", + "cubic", + "barycentric", + "krogh", + "from_derivatives", + "piecewise_polynomial", + "pchip", + "akima", + ], +) + + +@pytest.fixture +def create_index(_index_factory): + def _create_index(*args, **kwargs): + """return the _index_factory created using the args, kwargs""" + return _index_factory(*args, **kwargs) + + return _create_index + @pytest.mark.parametrize("freq", ["2D", "1h"]) @pytest.mark.parametrize( @@ -89,6 +134,53 @@ def test_resample_interpolate(index): tm.assert_frame_equal(result, expected) +@all_1d_no_arg_interpolation_methods +def test_resample_interpolate_regular_sampling_off_grid(method): + # GH#21351 + index = date_range("2000-01-01 00:01:00", periods=5, freq="2h") + ser = Series(np.arange(5.0), index) + + # Resample to 1 hour sampling and interpolate with the given method + ser_resampled = ser.resample("1h").interpolate(method) + + # Check that none of the resampled values are NaN, except the first one + # which lies 1 minute before the first actual data point + assert np.isnan(ser_resampled.iloc[0]) + assert not ser_resampled.iloc[1:].isna().any() + + if method not in ["nearest", "zero"]: + # Check that the resampled values are close to the expected values + # except for methods with known inaccuracies + assert np.all( + np.isclose(ser_resampled.values[1:], np.arange(0.5, 4.5, 0.5), rtol=1.0e-1) + ) + + +@all_1d_no_arg_interpolation_methods +def test_resample_interpolate_irregular_sampling(method): + # GH#21351 + ser = Series( + np.linspace(0.0, 1.0, 5), + index=DatetimeIndex( + [ + "2000-01-01 00:00:03", + "2000-01-01 00:00:22", + "2000-01-01 00:00:24", + "2000-01-01 00:00:31", + "2000-01-01 00:00:39", + ] + ), + ) + + # Resample to 5 second sampling and interpolate with the given method + ser_resampled = ser.resample("5s").interpolate(method) + + # Check that none of the resampled values are NaN, except the first one + # which lies 3 seconds before the first actual data point + assert np.isnan(ser_resampled.iloc[0]) + assert not ser_resampled.iloc[1:].isna().any() + + def test_raises_on_non_datetimelike_index(): # this is a non datetimelike index xp = DataFrame() From db68c2d4c42c3498bde2a0c2851bbdf9799bbb54 Mon Sep 17 00:00:00 2001 From: Carlo Barth Date: Fri, 15 Dec 2023 11:54:13 +0100 Subject: [PATCH 03/29] fix: Fixes wrong test case assumption for interpolation Fixes assumption in `test_interp_basic_with_non_range_index`. If the index is [1, 2, 3, 5] and values are [1, 2, np.nan, 4], it is wrong to expect that interpolation will result in 3 for the missing value in case of linear interpolation. It will rather be 2.666... --- pandas/tests/frame/methods/test_interpolate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/methods/test_interpolate.py b/pandas/tests/frame/methods/test_interpolate.py index e377fdd635bfe..2ce385d45ad11 100644 --- a/pandas/tests/frame/methods/test_interpolate.py +++ b/pandas/tests/frame/methods/test_interpolate.py @@ -127,7 +127,7 @@ def test_interp_basic_with_non_range_index(self, using_infer_string): with tm.assert_produces_warning(warning, match=msg): result = df.set_index("C").interpolate() expected = df.set_index("C") - expected.loc[3, "A"] = 3 + expected.loc[3, "A"] = 2.66667 expected.loc[5, "B"] = 9 tm.assert_frame_equal(result, expected) From dd8b8d39dd19e56772e20c26188731c373e28c11 Mon Sep 17 00:00:00 2001 From: Carlo Barth Date: Fri, 15 Dec 2023 12:36:24 +0100 Subject: [PATCH 04/29] fix: Make sure frequency indexes are preserved with new interpolation approach --- pandas/core/resample.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 974a0a8c08a15..37808f3f8d197 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1118,7 +1118,11 @@ def interpolate( # resampled index are removed if is_period_index: return result_interpolated - return result_interpolated.loc[final_index] + + result_interpolated = result_interpolated.loc[final_index] + # This is to make sure that frequency indexes are preserved + result_interpolated.index = final_index + return result_interpolated @final def asfreq(self, fill_value=None): From a04a3a251ba668d7ef53d354bf12a86a7bd0e8a7 Mon Sep 17 00:00:00 2001 From: Carlo Barth Date: Thu, 21 Dec 2023 19:58:29 +0100 Subject: [PATCH 05/29] fix: Fixes new-style up-sampling interpolation for MultiIndexes resulting from groupby-operations --- pandas/core/resample.py | 27 ++++++++++- pandas/tests/resample/test_time_grouper.py | 55 ++++++++++++++++++---- 2 files changed, 70 insertions(+), 12 deletions(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 37808f3f8d197..ac16b7191ae28 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1091,12 +1091,35 @@ def interpolate( # If the original data has timestamps which are not aligned with the # target timestamps, we need to add those points back to the data frame # that is supposed to be interpolated. This does not work with - # PeriodIndex, so we skip this case. + # PeriodIndex, so we skip this case. GH#21351 obj = self._selected_obj is_period_index = isinstance(obj.index, PeriodIndex) + # Skip this step for PeriodIndex if not is_period_index: final_index = result.index + if isinstance(final_index, MultiIndex): + # MultiIndex case: the `self._selected_obj` is the object before + # the groupby that led to this MultiIndex, so that the index + # is not directly available. We reconstruct it by obtaining the + # groupby columns from the final index, but assuming that the + # name of the datetime index is not included... + group_columns = list( + set(final_index.names).difference({obj.index.name}) + ) + + # ... To obtain a DataFrame with the groupby columns and the + # datetime index, we need to reset the index and groupby again, + # then apply the (cheap) first-aggregator. + obj = ( + obj.reset_index().groupby(group_columns + [obj.index.name]).first() + ) + + # The value columns that became index levels have to be added + # back manually. This is not ideal performance-wise. + for column in group_columns: + obj[column] = obj.index.get_level_values(column) + missing_data_points_index = obj.index.difference(final_index) if len(missing_data_points_index) > 0: result = concat( @@ -1120,7 +1143,7 @@ def interpolate( return result_interpolated result_interpolated = result_interpolated.loc[final_index] - # This is to make sure that frequency indexes are preserved + # We make sure frequency indexes are preserved result_interpolated.index = final_index return result_interpolated diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py index 3d9098917a12d..5127764f14b7d 100644 --- a/pandas/tests/resample/test_time_grouper.py +++ b/pandas/tests/resample/test_time_grouper.py @@ -337,21 +337,19 @@ def test_upsample_sum(method, method_args, expected_values): tm.assert_series_equal(result, expected) -def test_groupby_resample_interpolate(): - # GH 35325 - d = {"price": [10, 11, 9], "volume": [50, 60, 50]} - - df = DataFrame(d) - +@pytest.fixture +def groupy_test_df(): + df = DataFrame({"price": [10, 11, 9], "volume": [50, 60, 50]}) df["week_starting"] = date_range("01/01/2018", periods=3, freq="W") + return df.set_index("week_starting") + +def test_groupby_resample_interpolate(groupy_test_df): + # GH 35325 msg = "DataFrameGroupBy.resample operated on the grouping columns" with tm.assert_produces_warning(FutureWarning, match=msg): result = ( - df.set_index("week_starting") - .groupby("volume") - .resample("1D") - .interpolate(method="linear") + groupy_test_df.groupby("volume").resample("1D").interpolate(method="linear") ) volume = [50] * 15 + [60] @@ -388,3 +386,40 @@ def test_groupby_resample_interpolate(): index=expected_ind, ) tm.assert_frame_equal(result, expected) + + +def test_groupby_resample_interpolate_off_grid(groupy_test_df): + """Similar test as test_groupby_resample_interpolate but with resampling + that results in missing anchor points when interpolating. See GH#21351.""" + # GH#21351 + msg = "DataFrameGroupBy.resample operated on the grouping columns" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = ( + groupy_test_df.groupby("volume") + .resample("265H") + .interpolate(method="linear") + ) + + volume = [50, 50, 60] + week_starting = [ + Timestamp("2018-01-07"), + Timestamp("2018-01-18 01:00:00"), + Timestamp("2018-01-14"), + ] + expected_ind = pd.MultiIndex.from_arrays( + [volume, week_starting], + names=["volume", "week_starting"], + ) + + expected = DataFrame( + data={ + "price": [ + 10.0, + 9.5, + 11.0, + ], + "volume": np.array(volume).astype(float), + }, + index=expected_ind, + ) + tm.assert_frame_equal(result, expected) From efbba10dbee85c8ad191717b97d1d573bcb7d9a3 Mon Sep 17 00:00:00 2001 From: Carlo Barth Date: Tue, 2 Jan 2024 09:50:06 +0100 Subject: [PATCH 06/29] fix: Fixes wrong test case assumption when using linear interpolation on series with datetime index using business days only (test case `pandas.tests.series.methods.test_interpolate.TestSeriesInterpolateData.test_interpolate`). --- pandas/tests/series/methods/test_interpolate.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/tests/series/methods/test_interpolate.py b/pandas/tests/series/methods/test_interpolate.py index d854f0b787759..050b127af2b2b 100644 --- a/pandas/tests/series/methods/test_interpolate.py +++ b/pandas/tests/series/methods/test_interpolate.py @@ -94,7 +94,12 @@ def test_interpolate(self, datetime_series): ts = Series(np.arange(len(datetime_series), dtype=float), datetime_series.index) ts_copy = ts.copy() - ts_copy[5:10] = np.nan + + # Set data between Tuesday and Thursday to NaN for 2 consecutive weeks. + # Linear interpolation should fill in the missing values correctly, + # as the index is equally-spaced within each week. + ts_copy[1:4] = np.nan + ts_copy[6:9] = np.nan linear_interp = ts_copy.interpolate(method="linear") tm.assert_series_equal(linear_interp, ts) From 02944646affee125c8d32809699511156e7adead Mon Sep 17 00:00:00 2001 From: Carlo Barth Date: Tue, 2 Jan 2024 09:52:55 +0100 Subject: [PATCH 07/29] fix: Fixes wrong test case assumption when using linear interpolation on irregular index (test case `pandas.tests.series.methods.test_interpolate.TestSeriesInterpolateData.test_nan_irregular_index`). --- pandas/tests/series/methods/test_interpolate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/series/methods/test_interpolate.py b/pandas/tests/series/methods/test_interpolate.py index 050b127af2b2b..a0f8b03bc480b 100644 --- a/pandas/tests/series/methods/test_interpolate.py +++ b/pandas/tests/series/methods/test_interpolate.py @@ -271,7 +271,7 @@ def test_nan_interpolate(self, kwargs): def test_nan_irregular_index(self): s = Series([1, 2, np.nan, 4], index=[1, 3, 5, 9]) result = s.interpolate() - expected = Series([1.0, 2.0, 3.0, 4.0], index=[1, 3, 5, 9]) + expected = Series([1.0, 2.0, 2.6666666666666665, 4.0], index=[1, 3, 5, 9]) tm.assert_series_equal(result, expected) def test_nan_str_index(self): From 537f8bfcf035e62c8cb780c181d784130d291014 Mon Sep 17 00:00:00 2001 From: Carlo Barth Date: Tue, 2 Jan 2024 13:41:23 +0100 Subject: [PATCH 08/29] fix: Adds test skips for interpolation methods that require scipy if scipy is not installed --- pandas/tests/resample/test_base.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index 555e179f7c1cf..6c579bc825947 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -136,6 +136,7 @@ def test_resample_interpolate(index): @all_1d_no_arg_interpolation_methods def test_resample_interpolate_regular_sampling_off_grid(method): + pytest.importorskip("scipy") # GH#21351 index = date_range("2000-01-01 00:01:00", periods=5, freq="2h") ser = Series(np.arange(5.0), index) @@ -158,6 +159,7 @@ def test_resample_interpolate_regular_sampling_off_grid(method): @all_1d_no_arg_interpolation_methods def test_resample_interpolate_irregular_sampling(method): + pytest.importorskip("scipy") # GH#21351 ser = Series( np.linspace(0.0, 1.0, 5), From 4f78c75a6fb53c549ccda13de89859cdfcf3b3c8 Mon Sep 17 00:00:00 2001 From: Carlo Barth Date: Wed, 6 Mar 2024 10:31:41 +0100 Subject: [PATCH 09/29] fix: Makes sure keyword arguments "downcast" is not passed to scipy interpolation methods that are not using `interp1d` or spline. --- pandas/core/missing.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 8b58143e0a233..72f6f5020ab25 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -622,6 +622,9 @@ def _interpolate_scipy_wrapper( if not new_x.flags.writeable: new_x = new_x.copy() terp = alt_methods[method] + + # Make sure downcast is not in kwargs for alt methods + kwargs.pop("downcast", None) new_y = terp(x, y, new_x, **kwargs) return new_y From a5bcd45f9b72b4831528f2fc2c803a2d73bf2887 Mon Sep 17 00:00:00 2001 From: Carlo Barth Date: Wed, 6 Mar 2024 10:33:51 +0100 Subject: [PATCH 10/29] fix: Adjusted expected warning type in `test_groupby_resample_interpolate_off_grid`. --- pandas/tests/resample/test_time_grouper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py index 5e0141efcdf66..8c9afc5d628b1 100644 --- a/pandas/tests/resample/test_time_grouper.py +++ b/pandas/tests/resample/test_time_grouper.py @@ -389,10 +389,10 @@ def test_groupby_resample_interpolate_off_grid(groupy_test_df): that results in missing anchor points when interpolating. See GH#21351.""" # GH#21351 msg = "DataFrameGroupBy.resample operated on the grouping columns" - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(DeprecationWarning, match=msg): result = ( groupy_test_df.groupby("volume") - .resample("265H") + .resample("265h") .interpolate(method="linear") ) From 7d4b4cefc4195d552b6394aca66faa6e81f653bb Mon Sep 17 00:00:00 2001 From: Carlo Barth Date: Wed, 6 Mar 2024 12:19:56 +0100 Subject: [PATCH 11/29] fix: Fixes failing interpolation on groupby if the index has `name`=None. Adds this check to an existing test case. --- pandas/core/resample.py | 5 +- pandas/tests/resample/test_time_grouper.py | 82 ++++++++++++---------- 2 files changed, 46 insertions(+), 41 deletions(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 68e39df4e7f83..faf6ae30f7d94 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -941,9 +941,8 @@ def interpolate( # ... To obtain a DataFrame with the groupby columns and the # datetime index, we need to reset the index and groupby again, # then apply the (cheap) first-aggregator. - obj = ( - obj.reset_index().groupby(group_columns + [obj.index.name]).first() - ) + index_name = obj.index.name or "index" + obj = obj.reset_index().groupby(group_columns + [index_name]).first() # The value columns that became index levels have to be added # back manually. This is not ideal performance-wise. diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py index 8c9afc5d628b1..1b81e534b3f6a 100644 --- a/pandas/tests/resample/test_time_grouper.py +++ b/pandas/tests/resample/test_time_grouper.py @@ -342,46 +342,52 @@ def groupy_test_df(): def test_groupby_resample_interpolate(groupy_test_df): # GH 35325 - msg = "DataFrameGroupBy.resample operated on the grouping columns" - with tm.assert_produces_warning(DeprecationWarning, match=msg): - result = ( - groupy_test_df.groupby("volume").resample("1D").interpolate(method="linear") - ) - volume = [50] * 15 + [60] - week_starting = list(date_range("2018-01-07", "2018-01-21")) + [ - Timestamp("2018-01-14") - ] - expected_ind = pd.MultiIndex.from_arrays( - [volume, week_starting], - names=["volume", "week_starting"], - ) + # Make a copy of the test data frame that has index.name=None + groupy_test_df_without_index_name = groupy_test_df.copy() + groupy_test_df_without_index_name.index.name = None - expected = DataFrame( - data={ - "price": [ - 10.0, - 9.928571428571429, - 9.857142857142858, - 9.785714285714286, - 9.714285714285714, - 9.642857142857142, - 9.571428571428571, - 9.5, - 9.428571428571429, - 9.357142857142858, - 9.285714285714286, - 9.214285714285714, - 9.142857142857142, - 9.071428571428571, - 9.0, - 11.0, - ], - "volume": [50.0] * 15 + [60], - }, - index=expected_ind, - ) - tm.assert_frame_equal(result, expected) + dfs = [groupy_test_df, groupy_test_df_without_index_name] + + for df in dfs: + msg = "DataFrameGroupBy.resample operated on the grouping columns" + with tm.assert_produces_warning(DeprecationWarning, match=msg): + result = df.groupby("volume").resample("1D").interpolate(method="linear") + + volume = [50] * 15 + [60] + week_starting = list(date_range("2018-01-07", "2018-01-21")) + [ + Timestamp("2018-01-14") + ] + expected_ind = pd.MultiIndex.from_arrays( + [volume, week_starting], + names=["volume", df.index.name], + ) + + expected = DataFrame( + data={ + "price": [ + 10.0, + 9.928571428571429, + 9.857142857142858, + 9.785714285714286, + 9.714285714285714, + 9.642857142857142, + 9.571428571428571, + 9.5, + 9.428571428571429, + 9.357142857142858, + 9.285714285714286, + 9.214285714285714, + 9.142857142857142, + 9.071428571428571, + 9.0, + 11.0, + ], + "volume": [50.0] * 15 + [60], + }, + index=expected_ind, + ) + tm.assert_frame_equal(result, expected) def test_groupby_resample_interpolate_off_grid(groupy_test_df): From 09122499b023c5e2762022fbe0bfaeea272a65f5 Mon Sep 17 00:00:00 2001 From: Carlo Barth Date: Tue, 12 Mar 2024 11:18:16 +0100 Subject: [PATCH 12/29] Trigger Actions From 0ee5b8d4cdc656e71af75079c046af6e57a2904c Mon Sep 17 00:00:00 2001 From: Carlo Barth Date: Tue, 2 Apr 2024 09:35:35 +0200 Subject: [PATCH 13/29] feat: Raise error on attempt to interpolate a MultiIndex data frame, providing a useful error message that describes a working alternative syntax. Fixed related test cases and added test that makes sure the error is raised. --- pandas/core/resample.py | 34 ++++++--------- pandas/tests/resample/test_time_grouper.py | 50 ++++++++++++++-------- 2 files changed, 46 insertions(+), 38 deletions(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 8939332dcaa72..63e239d40969c 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -927,26 +927,16 @@ def interpolate( if not is_period_index: final_index = result.index if isinstance(final_index, MultiIndex): - # MultiIndex case: the `self._selected_obj` is the object before - # the groupby that led to this MultiIndex, so that the index - # is not directly available. We reconstruct it by obtaining the - # groupby columns from the final index, but assuming that the - # name of the datetime index is not included... - group_columns = list( - set(final_index.names).difference({obj.index.name}) + raise ValueError( + "Direct interpolation of MultiIndex data frames is not " + "supported. If you tried to resample and interpolate on a " + "grouped data frame, please use:\n" + "`df.groupby(...).apply(lambda x: x.resample(...)." + "interpolate(...), include_groups=False)`" + "\ninstead, as resampling and interpolation has to be " + "performed for each group independently." ) - # ... To obtain a DataFrame with the groupby columns and the - # datetime index, we need to reset the index and groupby again, - # then apply the (cheap) first-aggregator. - index_name = obj.index.name or "index" - obj = obj.reset_index().groupby(group_columns + [index_name]).first() - - # The value columns that became index levels have to be added - # back manually. This is not ideal performance-wise. - for column in group_columns: - obj[column] = obj.index.get_level_values(column) - missing_data_points_index = obj.index.difference(final_index) if len(missing_data_points_index) > 0: result = concat( @@ -964,13 +954,15 @@ def interpolate( **kwargs, ) - # We make sure that original data points which do not align with the - # resampled index are removed + # No further steps if the original data has a PeriodIndex if is_period_index: return result_interpolated + # Make sure that original data points which do not align with the + # resampled index are removed result_interpolated = result_interpolated.loc[final_index] - # We make sure frequency indexes are preserved + + # Make sure frequency indexes are preserved result_interpolated.index = final_index return result_interpolated diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py index 1b81e534b3f6a..36f005ea43839 100644 --- a/pandas/tests/resample/test_time_grouper.py +++ b/pandas/tests/resample/test_time_grouper.py @@ -340,7 +340,7 @@ def groupy_test_df(): return df.set_index("week_starting") -def test_groupby_resample_interpolate(groupy_test_df): +def test_groupby_resample_interpolate_raises(groupy_test_df): # GH 35325 # Make a copy of the test data frame that has index.name=None @@ -352,7 +352,28 @@ def test_groupby_resample_interpolate(groupy_test_df): for df in dfs: msg = "DataFrameGroupBy.resample operated on the grouping columns" with tm.assert_produces_warning(DeprecationWarning, match=msg): - result = df.groupby("volume").resample("1D").interpolate(method="linear") + with pytest.raises( + ValueError, + match="Direct interpolation of MultiIndex data frames is " + "not supported", + ): + df.groupby("volume").resample("1D").interpolate(method="linear") + + +def test_groupby_resample_interpolate_with_apply_syntax(groupy_test_df): + # GH 35325 + + # Make a copy of the test data frame that has index.name=None + groupy_test_df_without_index_name = groupy_test_df.copy() + groupy_test_df_without_index_name.index.name = None + + dfs = [groupy_test_df, groupy_test_df_without_index_name] + + for df in dfs: + result = df.groupby("volume").apply( + lambda x: x.resample("1d").interpolate(method="linear"), + include_groups=False, + ) volume = [50] * 15 + [60] week_starting = list(date_range("2018-01-07", "2018-01-21")) + [ @@ -382,25 +403,21 @@ def test_groupby_resample_interpolate(groupy_test_df): 9.071428571428571, 9.0, 11.0, - ], - "volume": [50.0] * 15 + [60], + ] }, index=expected_ind, ) tm.assert_frame_equal(result, expected) -def test_groupby_resample_interpolate_off_grid(groupy_test_df): - """Similar test as test_groupby_resample_interpolate but with resampling - that results in missing anchor points when interpolating. See GH#21351.""" +def test_groupby_resample_interpolate_with_apply_syntax_off_grid(groupy_test_df): + """Similar test as test_groupby_resample_interpolate_with_apply_syntax but + with resampling that results in missing anchor points when interpolating. + See GH#21351.""" # GH#21351 - msg = "DataFrameGroupBy.resample operated on the grouping columns" - with tm.assert_produces_warning(DeprecationWarning, match=msg): - result = ( - groupy_test_df.groupby("volume") - .resample("265h") - .interpolate(method="linear") - ) + result = groupy_test_df.groupby("volume").apply( + lambda x: x.resample("265h").interpolate(method="linear"), include_groups=False + ) volume = [50, 50, 60] week_starting = [ @@ -417,10 +434,9 @@ def test_groupby_resample_interpolate_off_grid(groupy_test_df): data={ "price": [ 10.0, - 9.5, + 9.21131, 11.0, - ], - "volume": np.array(volume).astype(float), + ] }, index=expected_ind, ) From d6af64aea38b4bfe1c07b6a05790ea8a6e94a89f Mon Sep 17 00:00:00 2001 From: Carlo Barth Date: Thu, 4 Apr 2024 08:58:05 +0200 Subject: [PATCH 14/29] Apply suggestions from code review Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- pandas/core/missing.py | 2 +- pandas/core/resample.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 21d6da10c5940..039d868bccd16 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -323,7 +323,7 @@ def get_interp_index(method, index: Index) -> Index: elif not is_numeric_dtype(index.dtype): # We keep behavior consistent with prior versions of pandas for # non-numeric, non-datetime indexes - index = Index(np.arange(len(index))) + index = Index(range(len(index))) else: methods = {"index", "values", "nearest", "time"} is_numeric_or_datetime = ( diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 63e239d40969c..f8fe962b5537c 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -927,7 +927,7 @@ def interpolate( if not is_period_index: final_index = result.index if isinstance(final_index, MultiIndex): - raise ValueError( + raise NotImplementedError( "Direct interpolation of MultiIndex data frames is not " "supported. If you tried to resample and interpolate on a " "grouped data frame, please use:\n" From 2a86a279398133ecd062c8fc980053615c5a8e76 Mon Sep 17 00:00:00 2001 From: Carlo Barth Date: Thu, 4 Apr 2024 09:00:48 +0200 Subject: [PATCH 15/29] refactor: Adjusted error type assertion in test case --- pandas/tests/resample/test_time_grouper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py index 36f005ea43839..d390aafa9ebd6 100644 --- a/pandas/tests/resample/test_time_grouper.py +++ b/pandas/tests/resample/test_time_grouper.py @@ -353,7 +353,7 @@ def test_groupby_resample_interpolate_raises(groupy_test_df): msg = "DataFrameGroupBy.resample operated on the grouping columns" with tm.assert_produces_warning(DeprecationWarning, match=msg): with pytest.raises( - ValueError, + NotImplementedError, match="Direct interpolation of MultiIndex data frames is " "not supported", ): From 9c90e234a1458e27869b1fb637774f86e274bdd5 Mon Sep 17 00:00:00 2001 From: Carlo Barth Date: Thu, 4 Apr 2024 09:05:17 +0200 Subject: [PATCH 16/29] refactor: Removed unused parametrization definitions and switched to direct parametrization for interpolation methods in tests. --- pandas/tests/resample/test_base.py | 60 ++++++++++++------------------ 1 file changed, 23 insertions(+), 37 deletions(-) diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index 4d04bda955e6e..55061d7c0f051 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -24,41 +24,23 @@ from pandas.core.indexes.timedeltas import timedelta_range from pandas.core.resample import _asfreq_compat -# a fixture value can be overridden by the test parameter value. Note that the -# value of the fixture can be overridden this way even if the test doesn't use -# it directly (doesn't mention it in the function prototype). -# see https://docs.pytest.org/en/latest/fixture.html#override-a-fixture-with-direct-test-parametrization # noqa: E501 -# in this module we override the fixture values defined in conftest.py -# tuples of '_index_factory,_series_name,_index_start,_index_end' -DATE_RANGE = (date_range, "dti", datetime(2005, 1, 1), datetime(2005, 1, 10)) -PERIOD_RANGE = (period_range, "pi", datetime(2005, 1, 1), datetime(2005, 1, 10)) -TIMEDELTA_RANGE = (timedelta_range, "tdi", "1 day", "10 day") - -all_ts = pytest.mark.parametrize( - "_index_factory,_series_name,_index_start,_index_end", - [DATE_RANGE, PERIOD_RANGE, TIMEDELTA_RANGE], -) - -all_1d_no_arg_interpolation_methods = pytest.mark.parametrize( - "method", - [ - "linear", - "time", - "index", - "values", - "nearest", - "zero", - "slinear", - "quadratic", - "cubic", - "barycentric", - "krogh", - "from_derivatives", - "piecewise_polynomial", - "pchip", - "akima", - ], -) +ALL_1D_NO_ARG_INTERPOLATION_METHODS = [ + "linear", + "time", + "index", + "values", + "nearest", + "zero", + "slinear", + "quadratic", + "cubic", + "barycentric", + "krogh", + "from_derivatives", + "piecewise_polynomial", + "pchip", + "akima", +] @pytest.fixture @@ -136,7 +118,9 @@ def test_resample_interpolate(index): tm.assert_frame_equal(result, expected) -@all_1d_no_arg_interpolation_methods +pytest.mark.parametrize("method", ALL_1D_NO_ARG_INTERPOLATION_METHODS) + + def test_resample_interpolate_regular_sampling_off_grid(method): pytest.importorskip("scipy") # GH#21351 @@ -159,7 +143,9 @@ def test_resample_interpolate_regular_sampling_off_grid(method): ) -@all_1d_no_arg_interpolation_methods +pytest.mark.parametrize("method", ALL_1D_NO_ARG_INTERPOLATION_METHODS) + + def test_resample_interpolate_irregular_sampling(method): pytest.importorskip("scipy") # GH#21351 From 4b2f3dc4f80b87fe7fc58e67f58c8a085a31f6fe Mon Sep 17 00:00:00 2001 From: Carlo Barth Date: Thu, 4 Apr 2024 09:54:25 +0200 Subject: [PATCH 17/29] fix: Adds forgotten "@" before pytest.mark.parametrize --- pandas/tests/resample/test_base.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index 55061d7c0f051..7e6ec16568751 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -118,9 +118,7 @@ def test_resample_interpolate(index): tm.assert_frame_equal(result, expected) -pytest.mark.parametrize("method", ALL_1D_NO_ARG_INTERPOLATION_METHODS) - - +@pytest.mark.parametrize("method", ALL_1D_NO_ARG_INTERPOLATION_METHODS) def test_resample_interpolate_regular_sampling_off_grid(method): pytest.importorskip("scipy") # GH#21351 @@ -143,9 +141,7 @@ def test_resample_interpolate_regular_sampling_off_grid(method): ) -pytest.mark.parametrize("method", ALL_1D_NO_ARG_INTERPOLATION_METHODS) - - +@pytest.mark.parametrize("method", ALL_1D_NO_ARG_INTERPOLATION_METHODS) def test_resample_interpolate_irregular_sampling(method): pytest.importorskip("scipy") # GH#21351 From 789c5118468b1c8e0bdabbb2f61dbf35a7901c28 Mon Sep 17 00:00:00 2001 From: Carlo Barth Date: Thu, 4 Apr 2024 19:21:10 +0200 Subject: [PATCH 18/29] refactor: Apply suggestions from code review --- pandas/tests/resample/test_base.py | 94 +++++++++++++++--------------- 1 file changed, 46 insertions(+), 48 deletions(-) diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index 7e6ec16568751..94680ea5752cf 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -24,32 +24,26 @@ from pandas.core.indexes.timedeltas import timedelta_range from pandas.core.resample import _asfreq_compat -ALL_1D_NO_ARG_INTERPOLATION_METHODS = [ - "linear", - "time", - "index", - "values", - "nearest", - "zero", - "slinear", - "quadratic", - "cubic", - "barycentric", - "krogh", - "from_derivatives", - "piecewise_polynomial", - "pchip", - "akima", -] - - -@pytest.fixture -def create_index(_index_factory): - def _create_index(*args, **kwargs): - """return the _index_factory created using the args, kwargs""" - return _index_factory(*args, **kwargs) - - return _create_index + +@pytest.fixture(scope="module") +def all_1d_no_arg_interpolation_methods(): + return [ + "linear", + "time", + "index", + "values", + "nearest", + "zero", + "slinear", + "quadratic", + "cubic", + "barycentric", + "krogh", + "from_derivatives", + "piecewise_polynomial", + "pchip", + "akima", + ] @pytest.mark.parametrize("freq", ["2D", "1h"]) @@ -118,31 +112,34 @@ def test_resample_interpolate(index): tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize("method", ALL_1D_NO_ARG_INTERPOLATION_METHODS) -def test_resample_interpolate_regular_sampling_off_grid(method): +def test_resample_interpolate_regular_sampling_off_grid( + all_1d_no_arg_interpolation_methods, +): pytest.importorskip("scipy") # GH#21351 index = date_range("2000-01-01 00:01:00", periods=5, freq="2h") ser = Series(np.arange(5.0), index) - # Resample to 1 hour sampling and interpolate with the given method - ser_resampled = ser.resample("1h").interpolate(method) + for method in all_1d_no_arg_interpolation_methods: + # Resample to 1 hour sampling and interpolate with the given method + ser_resampled = ser.resample("1h").interpolate(method) - # Check that none of the resampled values are NaN, except the first one - # which lies 1 minute before the first actual data point - assert np.isnan(ser_resampled.iloc[0]) - assert not ser_resampled.iloc[1:].isna().any() + # Check that none of the resampled values are NaN, except the first one + # which lies 1 minute before the first actual data point + assert np.isnan(ser_resampled.iloc[0]) + assert not ser_resampled.iloc[1:].isna().any() - if method not in ["nearest", "zero"]: - # Check that the resampled values are close to the expected values - # except for methods with known inaccuracies - assert np.all( - np.isclose(ser_resampled.values[1:], np.arange(0.5, 4.5, 0.5), rtol=1.0e-1) - ) + if method not in ["nearest", "zero"]: + # Check that the resampled values are close to the expected values + # except for methods with known inaccuracies + assert np.all( + np.isclose( + ser_resampled.values[1:], np.arange(0.5, 4.5, 0.5), rtol=1.0e-1 + ) + ) -@pytest.mark.parametrize("method", ALL_1D_NO_ARG_INTERPOLATION_METHODS) -def test_resample_interpolate_irregular_sampling(method): +def test_resample_interpolate_irregular_sampling(all_1d_no_arg_interpolation_methods): pytest.importorskip("scipy") # GH#21351 ser = Series( @@ -158,13 +155,14 @@ def test_resample_interpolate_irregular_sampling(method): ), ) - # Resample to 5 second sampling and interpolate with the given method - ser_resampled = ser.resample("5s").interpolate(method) + for method in all_1d_no_arg_interpolation_methods: + # Resample to 5 second sampling and interpolate with the given method + ser_resampled = ser.resample("5s").interpolate(method) - # Check that none of the resampled values are NaN, except the first one - # which lies 3 seconds before the first actual data point - assert np.isnan(ser_resampled.iloc[0]) - assert not ser_resampled.iloc[1:].isna().any() + # Check that none of the resampled values are NaN, except the first one + # which lies 3 seconds before the first actual data point + assert np.isnan(ser_resampled.iloc[0]) + assert not ser_resampled.iloc[1:].isna().any() def test_raises_on_non_datetimelike_index(): From 4f6d102d7d1266f851b4e475ff83bcb9ccca7428 Mon Sep 17 00:00:00 2001 From: Carlo Barth Date: Thu, 4 Apr 2024 19:59:39 +0200 Subject: [PATCH 19/29] refactor: Switched to ficture params syntax for test case parametrization --- pandas/tests/resample/test_base.py | 52 +++++++++++++++--------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index 94680ea5752cf..00621b74d02d1 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -25,9 +25,9 @@ from pandas.core.resample import _asfreq_compat -@pytest.fixture(scope="module") -def all_1d_no_arg_interpolation_methods(): - return [ +@pytest.fixture( + scope="session", + params=[ "linear", "time", "index", @@ -43,7 +43,10 @@ def all_1d_no_arg_interpolation_methods(): "piecewise_polynomial", "pchip", "akima", - ] + ], +) +def all_1d_no_arg_interpolation_methods(request): + return request.param @pytest.mark.parametrize("freq", ["2D", "1h"]) @@ -120,23 +123,21 @@ def test_resample_interpolate_regular_sampling_off_grid( index = date_range("2000-01-01 00:01:00", periods=5, freq="2h") ser = Series(np.arange(5.0), index) - for method in all_1d_no_arg_interpolation_methods: - # Resample to 1 hour sampling and interpolate with the given method - ser_resampled = ser.resample("1h").interpolate(method) + method = all_1d_no_arg_interpolation_methods + # Resample to 1 hour sampling and interpolate with the given method + ser_resampled = ser.resample("1h").interpolate(method) - # Check that none of the resampled values are NaN, except the first one - # which lies 1 minute before the first actual data point - assert np.isnan(ser_resampled.iloc[0]) - assert not ser_resampled.iloc[1:].isna().any() + # Check that none of the resampled values are NaN, except the first one + # which lies 1 minute before the first actual data point + assert np.isnan(ser_resampled.iloc[0]) + assert not ser_resampled.iloc[1:].isna().any() - if method not in ["nearest", "zero"]: - # Check that the resampled values are close to the expected values - # except for methods with known inaccuracies - assert np.all( - np.isclose( - ser_resampled.values[1:], np.arange(0.5, 4.5, 0.5), rtol=1.0e-1 - ) - ) + if method not in ["nearest", "zero"]: + # Check that the resampled values are close to the expected values + # except for methods with known inaccuracies + assert np.all( + np.isclose(ser_resampled.values[1:], np.arange(0.5, 4.5, 0.5), rtol=1.0e-1) + ) def test_resample_interpolate_irregular_sampling(all_1d_no_arg_interpolation_methods): @@ -155,14 +156,13 @@ def test_resample_interpolate_irregular_sampling(all_1d_no_arg_interpolation_met ), ) - for method in all_1d_no_arg_interpolation_methods: - # Resample to 5 second sampling and interpolate with the given method - ser_resampled = ser.resample("5s").interpolate(method) + # Resample to 5 second sampling and interpolate with the given method + ser_resampled = ser.resample("5s").interpolate(all_1d_no_arg_interpolation_methods) - # Check that none of the resampled values are NaN, except the first one - # which lies 3 seconds before the first actual data point - assert np.isnan(ser_resampled.iloc[0]) - assert not ser_resampled.iloc[1:].isna().any() + # Check that none of the resampled values are NaN, except the first one + # which lies 3 seconds before the first actual data point + assert np.isnan(ser_resampled.iloc[0]) + assert not ser_resampled.iloc[1:].isna().any() def test_raises_on_non_datetimelike_index(): From 4e9a6161897f9ed115448470d46c0fe71a06a526 Mon Sep 17 00:00:00 2001 From: Carlo Barth Date: Sat, 13 Apr 2024 11:07:14 +0200 Subject: [PATCH 20/29] Update pandas/tests/resample/test_time_grouper.py Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- pandas/tests/resample/test_time_grouper.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py index d390aafa9ebd6..c40f1d378e115 100644 --- a/pandas/tests/resample/test_time_grouper.py +++ b/pandas/tests/resample/test_time_grouper.py @@ -335,9 +335,7 @@ def test_upsample_sum(method, method_args, expected_values): @pytest.fixture def groupy_test_df(): - df = DataFrame({"price": [10, 11, 9], "volume": [50, 60, 50]}) - df["week_starting"] = date_range("01/01/2018", periods=3, freq="W") - return df.set_index("week_starting") + return DataFrame({"price": [10, 11, 9], "volume": [50, 60, 50]}, index=date_range("01/01/2018", periods=3, freq="W")) def test_groupby_resample_interpolate_raises(groupy_test_df): From c655bf167f64b44f85d65dfc752abcbbf6ffae4e Mon Sep 17 00:00:00 2001 From: Carlo Barth Date: Sat, 13 Apr 2024 11:07:42 +0200 Subject: [PATCH 21/29] Update pandas/tests/resample/test_base.py Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- pandas/tests/resample/test_base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index 00621b74d02d1..3428abacd509e 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -26,7 +26,6 @@ @pytest.fixture( - scope="session", params=[ "linear", "time", From eaa7e079402c12494df40eeb308e515b20dff9ce Mon Sep 17 00:00:00 2001 From: Carlo Barth Date: Sun, 14 Apr 2024 18:17:13 +0200 Subject: [PATCH 22/29] refactor: Fixes too long line --- pandas/tests/resample/test_time_grouper.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py index c40f1d378e115..d183f89b30fd0 100644 --- a/pandas/tests/resample/test_time_grouper.py +++ b/pandas/tests/resample/test_time_grouper.py @@ -335,7 +335,10 @@ def test_upsample_sum(method, method_args, expected_values): @pytest.fixture def groupy_test_df(): - return DataFrame({"price": [10, 11, 9], "volume": [50, 60, 50]}, index=date_range("01/01/2018", periods=3, freq="W")) + return DataFrame( + {"price": [10, 11, 9], "volume": [50, 60, 50]}, + index=date_range("01/01/2018", periods=3, freq="W"), + ) def test_groupby_resample_interpolate_raises(groupy_test_df): From 649bfa27a3fb52719e55806b44c7f41a397ba53b Mon Sep 17 00:00:00 2001 From: Carlo Barth Date: Sun, 14 Apr 2024 18:37:07 +0200 Subject: [PATCH 23/29] tests: Fixes test that fails due to unimportant index name comparison --- pandas/tests/resample/test_time_grouper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py index d183f89b30fd0..5f5a54c4d92a3 100644 --- a/pandas/tests/resample/test_time_grouper.py +++ b/pandas/tests/resample/test_time_grouper.py @@ -441,4 +441,4 @@ def test_groupby_resample_interpolate_with_apply_syntax_off_grid(groupy_test_df) }, index=expected_ind, ) - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected, check_names=False) From 4cfbbf1237d8d3d734d5b90978d8a1500d91ded8 Mon Sep 17 00:00:00 2001 From: Carlo Barth Date: Wed, 24 Apr 2024 13:22:17 +0200 Subject: [PATCH 24/29] docs: Added entry in whatsnew --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 50643454bbcec..f9ba25ef10427 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -455,6 +455,7 @@ Other - Bug in :class:`DataFrame` when passing a ``dict`` with a NA scalar and ``columns`` that would always return ``np.nan`` (:issue:`57205`) - Bug in :func:`tseries.api.guess_datetime_format` would fail to infer time format when "%Y" == "%H%M" (:issue:`57452`) - Bug in :func:`unique` on :class:`Index` not always returning :class:`Index` (:issue:`57043`) +- Bug in :meth:`DataFrame.resample(...).interpolate` on a DataFrame with non-uniform sampling and/or indices not aligning with the resulting resampled index would result in wrong interpolation (:issue:`21351`) - Bug in :meth:`DataFrame.sort_index` when passing ``axis="columns"`` and ``ignore_index=True`` and ``ascending=False`` not returning a :class:`RangeIndex` columns (:issue:`57293`) - Bug in :meth:`DataFrame.where` where using a non-bool type array in the function would return a ``ValueError`` instead of a ``TypeError`` (:issue:`56330`) - Bug in :meth:`Index.sort_values` when passing a key function that turns values into tuples, e.g. ``key=natsort.natsort_key``, would raise ``TypeError`` (:issue:`56081`) From 76794e32ad8fa36295f38e59345f3ad92cf41e7f Mon Sep 17 00:00:00 2001 From: Carlo Barth Date: Wed, 24 Apr 2024 13:25:09 +0200 Subject: [PATCH 25/29] Empty-Commit From 65551419e9b34b712c819b1e29dc4979152e67bc Mon Sep 17 00:00:00 2001 From: Carlo Barth Date: Wed, 24 Apr 2024 13:38:33 +0200 Subject: [PATCH 26/29] Empty-Commit From 48850cc6c26620dff1845369752e196f8d5181f8 Mon Sep 17 00:00:00 2001 From: Carlo Barth Date: Wed, 24 Apr 2024 18:08:06 +0200 Subject: [PATCH 27/29] Empty-Commit From 7f957cf16a4d81205eafc037428c9cf233157be4 Mon Sep 17 00:00:00 2001 From: Carlo Barth Date: Wed, 24 Apr 2024 19:00:08 +0200 Subject: [PATCH 28/29] docs: Sorted whatsnew --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index ebfc28e65d12f..d5e070ea857d5 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -461,9 +461,9 @@ Other ^^^^^ - Bug in :class:`DataFrame` when passing a ``dict`` with a NA scalar and ``columns`` that would always return ``np.nan`` (:issue:`57205`) - Bug in :func:`unique` on :class:`Index` not always returning :class:`Index` (:issue:`57043`) -- Bug in :meth:`DataFrame.resample(...).interpolate` on a DataFrame with non-uniform sampling and/or indices not aligning with the resulting resampled index would result in wrong interpolation (:issue:`21351`) - Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which caused an exception when using NumPy attributes via ``@`` notation, e.g., ``df.eval("@np.floor(a)")``. (:issue:`58041`) - Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which did not allow to use ``tan`` function. (:issue:`55091`) +- Bug in :meth:`DataFrame.resample(...).interpolate` on a DataFrame with non-uniform sampling and/or indices not aligning with the resulting resampled index would result in wrong interpolation (:issue:`21351`) - Bug in :meth:`DataFrame.sort_index` when passing ``axis="columns"`` and ``ignore_index=True`` and ``ascending=False`` not returning a :class:`RangeIndex` columns (:issue:`57293`) - Bug in :meth:`DataFrame.transform` that was returning the wrong order unless the index was monotonically increasing. (:issue:`57069`) - Bug in :meth:`DataFrame.where` where using a non-bool type array in the function would return a ``ValueError`` instead of a ``TypeError`` (:issue:`56330`) From 12bdd908e32fbc531c9a1dc0de990f9ea40ba8ca Mon Sep 17 00:00:00 2001 From: Carlo Barth Date: Wed, 24 Apr 2024 19:14:29 +0200 Subject: [PATCH 29/29] docs: Adjusted bug fix note and moved it to the right section --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 093dad808eb32..4f55bd0d5e7ad 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -437,6 +437,7 @@ Groupby/resample/rolling - Bug in :meth:`.DataFrameGroupBy.groups` and :meth:`.SeriesGroupby.groups` that would not respect groupby argument ``dropna`` (:issue:`55919`) - Bug in :meth:`.DataFrameGroupBy.median` where nat values gave an incorrect result. (:issue:`57926`) - Bug in :meth:`.DataFrameGroupBy.quantile` when ``interpolation="nearest"`` is inconsistent with :meth:`DataFrame.quantile` (:issue:`47942`) +- Bug in :meth:`.Resampler.interpolate` on a :class:`DataFrame` with non-uniform sampling and/or indices not aligning with the resulting resampled index would result in wrong interpolation (:issue:`21351`) - Bug in :meth:`DataFrame.ewm` and :meth:`Series.ewm` when passed ``times`` and aggregation functions other than mean (:issue:`51695`) - Bug in :meth:`DataFrameGroupBy.apply` that was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`) @@ -466,7 +467,6 @@ Other - Bug in :func:`unique` on :class:`Index` not always returning :class:`Index` (:issue:`57043`) - Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which caused an exception when using NumPy attributes via ``@`` notation, e.g., ``df.eval("@np.floor(a)")``. (:issue:`58041`) - Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which did not allow to use ``tan`` function. (:issue:`55091`) -- Bug in :meth:`DataFrame.resample(...).interpolate` on a DataFrame with non-uniform sampling and/or indices not aligning with the resulting resampled index would result in wrong interpolation (:issue:`21351`) - Bug in :meth:`DataFrame.sort_index` when passing ``axis="columns"`` and ``ignore_index=True`` and ``ascending=False`` not returning a :class:`RangeIndex` columns (:issue:`57293`) - Bug in :meth:`DataFrame.transform` that was returning the wrong order unless the index was monotonically increasing. (:issue:`57069`) - Bug in :meth:`DataFrame.where` where using a non-bool type array in the function would return a ``ValueError`` instead of a ``TypeError`` (:issue:`56330`)