SeitaBV · Flix6x · Nov 22, 2022 · Oct 21, 2022 · Oct 31, 2022 · Oct 31, 2022
diff --git a/timely_beliefs/beliefs/classes.py b/timely_beliefs/beliefs/classes.py
@@ -1347,6 +1347,15 @@ def resample_events(
             return self
         df = self
 
+        # Resample instantaneous sensors
+        # The event resolution stays zero, but the data frequency updates
+        if df.event_resolution == timedelta(0):
+            index_names = df.index.names
+            df = df.reset_index().set_index("event_start")
+            df = belief_utils.downsample_first(df, event_resolution)
+            df = df.reset_index().set_index(index_names)
+            return df
+
         belief_timing_col = (
             "belief_time" if "belief_time" in df.index.names else "belief_horizon"
         )

diff --git a/timely_beliefs/beliefs/utils.py b/timely_beliefs/beliefs/utils.py
@@ -283,7 +283,9 @@ def join_beliefs(
     if output_resolution > input_resolution:
 
         # Create new BeliefsDataFrame with downsampled event_start
-        if output_resolution % input_resolution != timedelta(0):
+        if input_resolution == timedelta(
+            0
+        ) or output_resolution % input_resolution != timedelta(0):
             raise NotImplementedError(
                 "Cannot downsample from resolution %s to %s."
                 % (input_resolution, output_resolution)
@@ -749,3 +751,21 @@ def extreme_timedeltas_not_equal(
     if isinstance(td_a, pd.Timedelta):
         td_a = td_a.to_pytimedelta()
     return td_a != td_b
+
+
+def downsample_first(df: pd.DataFrame, resolution: timedelta) -> pd.DataFrame:
+    """Resample data representing instantaneous events.
+
+    Updates the data frequency, while keeping the event resolution.
+
+    Note that the data frequency may not be constant due to DST transitions
+    The duration between observations is longer for the fall DST transition,
+    and shorter for the spring DST transition.
+    """
+    ds_index = df.index.floor(
+        resolution, ambiguous=[True] * len(df), nonexistent="shift_forward"
+    ).drop_duplicates()
+    ds_df = df[df.index.isin(df.index.join(ds_index, how="inner"))]
+    if ds_df.index.freq is None and len(ds_df) > 2:
+        ds_df.index.freq = pd.infer_freq(ds_df.index)
+    return ds_df
diff --git a/timely_beliefs/tests/test_belief_utils.py b/timely_beliefs/tests/test_belief_utils.py
@@ -1,8 +1,10 @@
 import pandas as pd
+import pytest
 
 from timely_beliefs.beliefs.probabilistic_utils import get_median_belief
-from timely_beliefs.beliefs.utils import propagate_beliefs
+from timely_beliefs.beliefs.utils import downsample_first, propagate_beliefs
 from timely_beliefs.examples import get_example_df
+from timely_beliefs.tests.utils import equal_lists
 
 
 def test_propagate_multi_sourced_deterministic_beliefs():
@@ -31,3 +33,55 @@ def test_propagate_multi_sourced_deterministic_beliefs():
             == pd.Timestamp("2000-01-01 01:00:00+00:00")
         ].droplevel("belief_time"),
     )
+
+
+@pytest.mark.parametrize(
+    ("start", "periods", "resolution", "exp_event_values"),
+    [
+        (
+            "2022-03-27 01:00+01",
+            7,
+            "PT2H",
+            [2, 3, 5, 7],
+        ),  # DST transition from +01 to +02 (spring forward, contracted event)
+        (
+            "2022-10-30 01:00+02",
+            7,
+            "PT2H",
+            [2, 5, 7],
+        ),  # DST transition from +02 to +01 (fall back -> extended event)
+        (
+            "2022-03-26 01:00+01",
+            23 + 23 + 23,
+            "PT24H",
+            [24, 47],
+        ),  # midnight of 1 full (contracted) day, plus the following midnight of 1 partial day
+        (
+            "2022-03-26 01:00+01",
+            23 + 23 + 23,
+            "P1D",
+            [24, 47],
+        ),  # midnight of 1 full (contracted) day, plus the following midnight of 1 partial day
+        (
+            "2022-10-29 01:00+02",
+            23 + 25 + 23,
+            "PT24H",
+            [24, 49],
+        ),  # midnight of 1 full (extended) day, plus the following midnight of 1 partial day
+        (
+            "2022-10-29 01:00+02",
+            23 + 25 + 24 + 23,
+            "P1D",
+            [24, 49, 73],
+        ),  # midnight of 1 full (extended) day and 1 full (regular) day, plus the following midnight of 1 partial day
+    ],
+)
+def test_downsample_first(start, periods, resolution, exp_event_values):
+    """Enumerate the events and check whether downsampling returns the expected events."""
+    index = pd.date_range(start, periods=periods, freq="1H").tz_convert(
+        "Europe/Amsterdam"
+    )
+    df = pd.DataFrame(list(range(1, periods + 1)), index=index)
+    ds_df = downsample_first(df, pd.Timedelta(resolution))
+    print(ds_df)
+    assert equal_lists(ds_df.values, exp_event_values)
diff --git a/timely_beliefs/tests/test_df_resampling.py b/timely_beliefs/tests/test_df_resampling.py
@@ -15,26 +15,27 @@
 
 @pytest.fixture(scope="function", autouse=True)
 def df_wxyz(
-    time_slot_sensor: Sensor, test_source_a: BeliefSource, test_source_b: BeliefSource
-) -> Callable[[int, int, int, int, Optional[datetime]], BeliefsDataFrame]:
+    test_source_a: BeliefSource, test_source_b: BeliefSource
+) -> Callable[[Sensor, int, int, int, int, Optional[datetime]], BeliefsDataFrame]:
     """Convenient BeliefsDataFrame to run tests on.
     For a single sensor, it contains w events, for each of which x beliefs by y sources each (max 2),
     described by z probabilistic values (max 3).
-    Note that the event resolution of the sensor is 15 minutes.
     """
 
     sources = [test_source_a, test_source_b]  # expand to increase max y
     cps = [0.1587, 0.5, 0.8413]  # expand to increase max z
 
-    def f(w: int, x: int, y: int, z: int, start: Optional[datetime] = None):
+    def f(
+        sensor: Sensor, w: int, x: int, y: int, z: int, start: Optional[datetime] = None
+    ):
         if start is None:
             start = datetime(2000, 1, 3, 9, tzinfo=pytz.utc)
 
         # Build up a BeliefsDataFrame with various events, beliefs, sources and probabilistic accuracy (for a single sensor)
         beliefs = [
             TimedBelief(
                 source=sources[s],
-                sensor=time_slot_sensor,
+                sensor=sensor,
                 value=1000 * e + 100 * b + 10 * s + p,
                 belief_time=datetime(2000, 1, 1, tzinfo=pytz.utc) + timedelta(hours=b),
                 event_start=start + timedelta(hours=e),
@@ -45,7 +46,7 @@ def f(w: int, x: int, y: int, z: int, start: Optional[datetime] = None):
             for s in range(y)  # y sources
             for p in range(z)  # z cumulative probabilities
         ]
-        return BeliefsDataFrame(sensor=time_slot_sensor, beliefs=beliefs)
+        return BeliefsDataFrame(sensor=sensor, beliefs=beliefs)
 
     return f
 
@@ -55,15 +56,35 @@ def df_4323(
     time_slot_sensor: Sensor,
     test_source_a: BeliefSource,
     test_source_b: BeliefSource,
-    df_wxyz: Callable[[int, int, int, int, Optional[datetime]], BeliefsDataFrame],
+    df_wxyz: Callable[
+        [Sensor, int, int, int, int, Optional[datetime]], BeliefsDataFrame
+    ],
+) -> BeliefsDataFrame:
+    """Convenient BeliefsDataFrame to run tests on.
+    For a single sensor, it contains 4 events, for each of which 3 beliefs by 2 sources each, described by 3
+    probabilistic values.
+    Note that the event resolution of the sensor is 15 minutes.
+    """
+    start = pytz.timezone("utc").localize(datetime(2000, 1, 3, 9))
+    return df_wxyz(time_slot_sensor, 4, 3, 2, 3, start)
+
+
+@pytest.fixture(scope="function", autouse=True)
+def df_instantaneous_4323(
+    instantaneous_sensor: Sensor,
+    test_source_a: BeliefSource,
+    test_source_b: BeliefSource,
+    df_wxyz: Callable[
+        [Sensor, int, int, int, int, Optional[datetime]], BeliefsDataFrame
+    ],
 ) -> BeliefsDataFrame:
     """Convenient BeliefsDataFrame to run tests on.
     For a single sensor, it contains 4 events, for each of which 3 beliefs by 2 sources each, described by 3
     probabilistic values.
     Note that the event resolution of the sensor is 15 minutes.
     """
     start = pytz.timezone("utc").localize(datetime(2000, 1, 3, 9))
-    return df_wxyz(4, 3, 2, 3, start)
+    return df_wxyz(instantaneous_sensor, 4, 3, 2, 3, start)
 
 
 def test_replace_index_level_with_intersect(df_4323):
@@ -241,13 +262,18 @@ def test_percentages_and_accuracy_of_probabilistic_model(df_4323: BeliefsDataFra
 
 
 def test_downsample_once_upsample_once_around_dst(
-    df_wxyz: Callable[[int, int, int, int, Optional[datetime]], BeliefsDataFrame]
+    time_slot_sensor: Sensor,
+    df_wxyz: Callable[
+        [Sensor, int, int, int, int, Optional[datetime]], BeliefsDataFrame
+    ],
 ):
     """Fast track resampling is enabled because the data contains 1 deterministic belief per event and a unique belief time and source."""
     downsampled_event_resolution = timedelta(hours=24)
     upsampled_event_resolution = timedelta(minutes=10)
     start = pytz.timezone("Europe/Amsterdam").localize(datetime(2020, 3, 29, 0))
-    df = df_wxyz(25, 1, 1, 1, start)  # 1 deterministic belief per event
+    df = df_wxyz(
+        time_slot_sensor, 25, 1, 1, 1, start
+    )  # 1 deterministic belief per event
     df.iloc[0] = np.NaN  # introduce 1 NaN value
     print(df)
 
@@ -311,3 +337,22 @@ def test_groupby_preserves_metadata(df_4323: BeliefsDataFrame):
     assert slice_0.sensor == df.sensor
     df_2 = grouper.apply(lambda x: x.head(1))
     assert df_2.sensor == df.sensor
+
+
+def test_downsample_instantaneous(df_instantaneous_4323):
+    """Check resampling instantaneous events from hourly readings to 2-hourly readings.
+
+    Given data for 9, 10, 11 and 12 o'clock, we expect to get out only data for 10 and 12 o'clock.
+    """
+    pd.set_option("display.max_rows", None)
+    print(df_instantaneous_4323)
+    # Downsample the original frame
+    downsampled_event_resolution = timedelta(hours=2)
+    df_resampled_1 = df_instantaneous_4323.resample_events(downsampled_event_resolution)
+    print(df_resampled_1)
+    df_expected = df_instantaneous_4323[
+        df_instantaneous_4323.index.get_level_values("event_start").isin(
+            ["2000-01-03 10:00:00+00:00", "2000-01-03 12:00:00+00:00"]
+        )
+    ]
+    pd.testing.assert_frame_equal(df_resampled_1, df_expected)