diff --git a/polars/polars-lazy/polars-plan/src/dsl/function_expr/temporal.rs b/polars/polars-lazy/polars-plan/src/dsl/function_expr/temporal.rs index 9720c3041f74..6e695732a52f 100644 --- a/polars/polars-lazy/polars-plan/src/dsl/function_expr/temporal.rs +++ b/polars/polars-lazy/polars-plan/src/dsl/function_expr/temporal.rs @@ -8,11 +8,13 @@ use super::*; #[cfg(feature = "date_offset")] pub(super) fn date_offset(s: Series, offset: Duration) -> PolarsResult { + let preserve_sortedness: bool; let out = match s.dtype().clone() { DataType::Date => { let s = s .cast(&DataType::Datetime(TimeUnit::Milliseconds, None)) .unwrap(); + preserve_sortedness = true; date_offset(s, offset).and_then(|s| s.cast(&DataType::Date)) } DataType::Datetime(tu, tz) => { @@ -37,16 +39,25 @@ pub(super) fn date_offset(s: Series, offset: Duration) -> PolarsResult { ca.0.try_apply(|v| offset_fn(&offset, v, None)) } }?; + // Sortedness may not be preserved when crossing daylight savings time boundaries + // for calendar-aware durations. + // Constant durations (e.g. 2 hours) always preserve sortedness. + preserve_sortedness = + tz.is_none() || tz.as_deref() == Some("UTC") || offset.is_constant_duration(); out.cast(&DataType::Datetime(tu, tz)) } dt => polars_bail!( ComputeError: "cannot use 'date_offset' on Series of datatype {}", dt, ), }; - out.map(|mut out| { - out.set_sorted_flag(s.is_sorted_flag()); + if preserve_sortedness { + out.map(|mut out| { + out.set_sorted_flag(s.is_sorted_flag()); + out + }) + } else { out - }) + } } pub(super) fn combine(s: &[Series], tu: TimeUnit) -> PolarsResult { diff --git a/polars/polars-time/src/windows/duration.rs b/polars/polars-time/src/windows/duration.rs index 2686c6bc73cb..e6c11e4701ae 100644 --- a/polars/polars-time/src/windows/duration.rs +++ b/polars/polars-time/src/windows/duration.rs @@ -338,6 +338,10 @@ impl Duration { self.days } + pub fn is_constant_duration(&self) -> bool { + self.months == 0 && self.weeks == 0 && self.days == 0 + } + /// Returns the nanoseconds from the `Duration` without the weeks or months part. pub fn nanoseconds(&self) -> i64 { self.nsecs diff --git a/py-polars/tests/unit/namespaces/test_datetime.py b/py-polars/tests/unit/namespaces/test_datetime.py index ecb76458870d..fb528381da62 100644 --- a/py-polars/tests/unit/namespaces/test_datetime.py +++ b/py-polars/tests/unit/namespaces/test_datetime.py @@ -139,6 +139,32 @@ def test_local_time_sortedness(time_zone: str | None) -> None: assert result.flags["SORTED_DESC"] is False +@pytest.mark.parametrize( + ("time_zone", "offset", "expected"), + [ + (None, "1d", True), + ("Asia/Kathmandu", "1d", False), + ("UTC", "1d", True), + (None, "1mo", True), + ("Asia/Kathmandu", "1mo", False), + ("UTC", "1mo", True), + (None, "1w", True), + ("Asia/Kathmandu", "1w", False), + ("UTC", "1w", True), + (None, "1h", True), + ("Asia/Kathmandu", "1h", True), + ("UTC", "1h", True), + ], +) +def test_offset_by_sortedness( + time_zone: str | None, offset: str, expected: bool +) -> None: + ser = (pl.Series([datetime(2022, 1, 1, 23)]).dt.replace_time_zone(time_zone)).sort() + result = ser.dt.offset_by(offset) + assert result.flags["SORTED_ASC"] == expected + assert result.flags["SORTED_DESC"] is False + + def test_dt_datetime_date_time_invalid() -> None: with pytest.raises(ComputeError, match="expected Datetime"): pl.Series([date(2021, 1, 2)]).dt.datetime()