From 62235175633690aaaa740ef2d6c2c1099623fe77 Mon Sep 17 00:00:00 2001 From: Ville Brofeldt <33317356+villebro@users.noreply.github.com> Date: Thu, 14 Mar 2024 12:02:01 -0700 Subject: [PATCH] fix(postprocessing): resample with holes (#27487) --- .../utils/pandas_postprocessing/resample.py | 5 +- .../pandas_postprocessing/test_resample.py | 54 ++++++++++++++++++- 2 files changed, 57 insertions(+), 2 deletions(-) diff --git a/superset/utils/pandas_postprocessing/resample.py b/superset/utils/pandas_postprocessing/resample.py index a82d7031e9c12..a689895bd6283 100644 --- a/superset/utils/pandas_postprocessing/resample.py +++ b/superset/utils/pandas_postprocessing/resample.py @@ -43,13 +43,16 @@ def resample( raise InvalidPostProcessingError(_("Resample operation requires DatetimeIndex")) if method not in RESAMPLE_METHOD: raise InvalidPostProcessingError( - _("Resample method should in ") + ", ".join(RESAMPLE_METHOD) + "." + _("Resample method should be in ") + ", ".join(RESAMPLE_METHOD) + "." ) if method == "asfreq" and fill_value is not None: _df = df.resample(rule).asfreq(fill_value=fill_value) + _df = _df.fillna(fill_value) elif method == "linear": _df = df.resample(rule).interpolate() else: _df = getattr(df.resample(rule), method)() + if method in ("ffill", "bfill"): + _df = _df.fillna(method=method) return _df diff --git a/tests/unit_tests/pandas_postprocessing/test_resample.py b/tests/unit_tests/pandas_postprocessing/test_resample.py index b1414c5fe8fdc..207863ab87946 100644 --- a/tests/unit_tests/pandas_postprocessing/test_resample.py +++ b/tests/unit_tests/pandas_postprocessing/test_resample.py @@ -21,7 +21,11 @@ from superset.exceptions import InvalidPostProcessingError from superset.utils import pandas_postprocessing as pp -from tests.unit_tests.fixtures.dataframes import categories_df, timeseries_df +from tests.unit_tests.fixtures.dataframes import ( + categories_df, + timeseries_df, + timeseries_with_gap_df, +) def test_resample_should_not_side_effect(): @@ -63,6 +67,29 @@ def test_resample(): ) +def test_resample_ffill_with_gaps(): + post_df = pp.resample(df=timeseries_with_gap_df, rule="1D", method="ffill") + assert post_df.equals( + pd.DataFrame( + index=pd.to_datetime( + [ + "2019-01-01", + "2019-01-02", + "2019-01-03", + "2019-01-04", + "2019-01-05", + "2019-01-06", + "2019-01-07", + ] + ), + data={ + "label": ["x", "y", "y", "y", "z", "z", "q"], + "y": [1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 4.0], + }, + ) + ) + + def test_resample_zero_fill(): post_df = pp.resample(df=timeseries_df, rule="1D", method="asfreq", fill_value=0) assert post_df.equals( @@ -86,6 +113,31 @@ def test_resample_zero_fill(): ) +def test_resample_zero_fill_with_gaps(): + post_df = pp.resample( + df=timeseries_with_gap_df, rule="1D", method="asfreq", fill_value=0 + ) + assert post_df.equals( + pd.DataFrame( + index=pd.to_datetime( + [ + "2019-01-01", + "2019-01-02", + "2019-01-03", + "2019-01-04", + "2019-01-05", + "2019-01-06", + "2019-01-07", + ] + ), + data={ + "label": ["x", "y", 0, 0, "z", 0, "q"], + "y": [1.0, 2.0, 0, 0, 0, 0, 4.0], + }, + ) + ) + + def test_resample_after_pivot(): df = pd.DataFrame( data={