Skip to content

Commit

Permalink
fix(rust, python): dont preserve sortedness in offset_by for tz-aware…
Browse files Browse the repository at this point in the history
… non-constant durations (#9818)
  • Loading branch information
MarcoGorelli authored Jul 12, 2023
1 parent 4e70557 commit 9d2d689
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 3 deletions.
17 changes: 14 additions & 3 deletions polars/polars-lazy/polars-plan/src/dsl/function_expr/temporal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,13 @@ use super::*;

#[cfg(feature = "date_offset")]
pub(super) fn date_offset(s: Series, offset: Duration) -> PolarsResult<Series> {
let preserve_sortedness: bool;
let out = match s.dtype().clone() {
DataType::Date => {
let s = s
.cast(&DataType::Datetime(TimeUnit::Milliseconds, None))
.unwrap();
preserve_sortedness = true;
date_offset(s, offset).and_then(|s| s.cast(&DataType::Date))
}
DataType::Datetime(tu, tz) => {
Expand All @@ -37,16 +39,25 @@ pub(super) fn date_offset(s: Series, offset: Duration) -> PolarsResult<Series> {
ca.0.try_apply(|v| offset_fn(&offset, v, None))
}
}?;
// Sortedness may not be preserved when crossing daylight savings time boundaries
// for calendar-aware durations.
// Constant durations (e.g. 2 hours) always preserve sortedness.
preserve_sortedness =
tz.is_none() || tz.as_deref() == Some("UTC") || offset.is_constant_duration();
out.cast(&DataType::Datetime(tu, tz))
}
dt => polars_bail!(
ComputeError: "cannot use 'date_offset' on Series of datatype {}", dt,
),
};
out.map(|mut out| {
out.set_sorted_flag(s.is_sorted_flag());
if preserve_sortedness {
out.map(|mut out| {
out.set_sorted_flag(s.is_sorted_flag());
out
})
} else {
out
})
}
}

pub(super) fn combine(s: &[Series], tu: TimeUnit) -> PolarsResult<Series> {
Expand Down
4 changes: 4 additions & 0 deletions polars/polars-time/src/windows/duration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,10 @@ impl Duration {
self.days
}

pub fn is_constant_duration(&self) -> bool {
self.months == 0 && self.weeks == 0 && self.days == 0
}

/// Returns the nanoseconds from the `Duration` without the weeks or months part.
pub fn nanoseconds(&self) -> i64 {
self.nsecs
Expand Down
26 changes: 26 additions & 0 deletions py-polars/tests/unit/namespaces/test_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,32 @@ def test_local_time_sortedness(time_zone: str | None) -> None:
assert result.flags["SORTED_DESC"] is False


@pytest.mark.parametrize(
("time_zone", "offset", "expected"),
[
(None, "1d", True),
("Asia/Kathmandu", "1d", False),
("UTC", "1d", True),
(None, "1mo", True),
("Asia/Kathmandu", "1mo", False),
("UTC", "1mo", True),
(None, "1w", True),
("Asia/Kathmandu", "1w", False),
("UTC", "1w", True),
(None, "1h", True),
("Asia/Kathmandu", "1h", True),
("UTC", "1h", True),
],
)
def test_offset_by_sortedness(
time_zone: str | None, offset: str, expected: bool
) -> None:
ser = (pl.Series([datetime(2022, 1, 1, 23)]).dt.replace_time_zone(time_zone)).sort()
result = ser.dt.offset_by(offset)
assert result.flags["SORTED_ASC"] == expected
assert result.flags["SORTED_DESC"] is False


def test_dt_datetime_date_time_invalid() -> None:
with pytest.raises(ComputeError, match="expected Datetime"):
pl.Series([date(2021, 1, 2)]).dt.datetime()
Expand Down

0 comments on commit 9d2d689

Please sign in to comment.