Skip to content

Commit

Permalink
feat(rust, python): add dt.dst_offset feature (pola-rs#9629)
Browse files Browse the repository at this point in the history
  • Loading branch information
MarcoGorelli authored and c-peters committed Jul 14, 2023
1 parent 906e379 commit 40af4dd
Show file tree
Hide file tree
Showing 10 changed files with 172 additions and 2 deletions.
7 changes: 7 additions & 0 deletions polars/polars-lazy/polars-plan/src/dsl/dt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,13 @@ impl DateLikeNameSpace {
.map_private(FunctionExpr::TemporalExpr(TemporalFunction::MonthEnd))
}

// Get the additional offset from UTC currently in effect (usually due to daylight saving time)
#[cfg(feature = "timezones")]
pub fn dst_offset(self) -> Expr {
self.0
.map_private(FunctionExpr::TemporalExpr(TemporalFunction::DSTOffset))
}

pub fn round<S: AsRef<str>>(self, every: S, offset: S) -> Expr {
let every = every.as_ref().into();
let offset = offset.as_ref().into();
Expand Down
23 changes: 23 additions & 0 deletions polars/polars-lazy/polars-plan/src/dsl/function_expr/datetime.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#[cfg(feature = "timezones")]
use chrono_tz::Tz;
#[cfg(feature = "timezones")]
use polars_time::dst_offset as dst_offset_fn;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};

Expand Down Expand Up @@ -32,6 +34,8 @@ pub enum TemporalFunction {
MonthStart,
#[cfg(feature = "date_offset")]
MonthEnd,
#[cfg(feature = "timezones")]
DSTOffset,
Round(String, String),
#[cfg(feature = "timezones")]
CastTimezone(Option<TimeZone>, Option<bool>),
Expand Down Expand Up @@ -77,6 +81,8 @@ impl Display for TemporalFunction {
MonthStart => "month_start",
#[cfg(feature = "date_offset")]
MonthEnd => "month_end",
#[cfg(feature = "timezones")]
DSTOffset => "dst_offset",
Round(..) => "round",
#[cfg(feature = "timezones")]
CastTimezone(_, _) => "replace_timezone",
Expand Down Expand Up @@ -254,6 +260,23 @@ pub(super) fn month_end(s: &Series) -> PolarsResult<Series> {
})
}

#[cfg(feature = "timezones")]
pub(super) fn dst_offset(s: &Series) -> PolarsResult<Series> {
match s.dtype() {
DataType::Datetime(time_unit, Some(tz)) => {
let tz = tz
.parse::<Tz>()
.expect("Time zone has already been validated");
Ok(dst_offset_fn(s.datetime().unwrap(), time_unit, &tz).into_series())
}
dt => polars_bail!(
opq = dst_offset,
got = dt,
expected = "time-zone-aware datetime"
),
}
}

pub(super) fn round(s: &Series, every: &str, offset: &str) -> PolarsResult<Series> {
let every = Duration::parse(every);
let offset = Duration::parse(offset);
Expand Down
2 changes: 2 additions & 0 deletions polars/polars-lazy/polars-plan/src/dsl/function_expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -614,6 +614,8 @@ impl From<TemporalFunction> for SpecialEq<Arc<dyn SeriesUdf>> {
MonthStart => map!(datetime::month_start),
#[cfg(feature = "date_offset")]
MonthEnd => map!(datetime::month_end),
#[cfg(feature = "timezones")]
DSTOffset => map!(datetime::dst_offset),
Round(every, offset) => map!(datetime::round, &every, &offset),
#[cfg(feature = "timezones")]
CastTimezone(tz, use_earliest) => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ impl FunctionExpr {
MonthStart => mapper.with_same_dtype().unwrap().dtype,
#[cfg(feature = "date_offset")]
MonthEnd => mapper.with_same_dtype().unwrap().dtype,
#[cfg(feature = "timezones")]
DSTOffset => DataType::Duration(TimeUnit::Milliseconds),
Round(..) => mapper.with_same_dtype().unwrap().dtype,
#[cfg(feature = "timezones")]
CastTimezone(tz, _use_earliest) => {
Expand Down
27 changes: 27 additions & 0 deletions polars/polars-time/src/dst_offset.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#[cfg(feature = "timezones")]
use chrono::TimeZone;
#[cfg(feature = "timezones")]
use chrono_tz::OffsetComponents;
#[cfg(feature = "timezones")]
use polars_arrow::time_zone::Tz;
#[cfg(feature = "timezones")]
use polars_core::prelude::*;
#[cfg(feature = "timezones")]
use polars_core::utils::arrow::temporal_conversions::{
timestamp_ms_to_datetime, timestamp_ns_to_datetime, timestamp_us_to_datetime,
};

#[cfg(feature = "timezones")]
pub fn dst_offset(ca: &DatetimeChunked, time_unit: &TimeUnit, time_zone: &Tz) -> DurationChunked {
let timestamp_to_datetime = match time_unit {
TimeUnit::Nanoseconds => timestamp_ns_to_datetime,
TimeUnit::Microseconds => timestamp_us_to_datetime,
TimeUnit::Milliseconds => timestamp_ms_to_datetime,
};
ca.0.apply(|t| {
let ndt = timestamp_to_datetime(t);
let dt = time_zone.from_utc_datetime(&ndt);
dt.offset().dst_offset().num_milliseconds()
})
.into_duration(TimeUnit::Milliseconds)
}
3 changes: 3 additions & 0 deletions polars/polars-time/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#![cfg_attr(docsrs, feature(doc_auto_cfg))]
pub mod chunkedarray;
mod date_range;
mod dst_offset;
mod groupby;
mod month_end;
mod month_start;
Expand All @@ -13,6 +14,8 @@ mod utils;
mod windows;

pub use date_range::*;
#[cfg(feature = "timezones")]
pub use dst_offset::*;
#[cfg(any(feature = "dtype-date", feature = "dtype-datetime"))]
pub use groupby::dynamic::*;
pub use month_end::*;
Expand Down
30 changes: 30 additions & 0 deletions py-polars/polars/expr/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -1972,3 +1972,33 @@ def month_end(self) -> Expr:
└─────────────────────┘
"""
return wrap_expr(self._pyexpr.dt_month_end())

def dst_offset(self) -> Expr:
"""
Additional offset currently in effect (typically due to daylight saving time).
Returns
-------
Duration expression
Examples
--------
>>> from datetime import datetime
>>> df = pl.DataFrame(
... {
... "ts": [datetime(2020, 10, 25), datetime(2020, 10, 26)],
... }
... )
>>> df = df.with_columns(pl.col("ts").dt.replace_time_zone("Europe/London"))
>>> df.with_columns(pl.col("ts").dt.dst_offset().alias("dst_offset"))
shape: (2, 2)
┌─────────────────────────────┬──────────────┐
│ ts ┆ dst_offset │
│ --- ┆ --- │
│ datetime[μs, Europe/London] ┆ duration[ms] │
╞═════════════════════════════╪══════════════╡
│ 2020-10-25 00:00:00 BST ┆ 1h │
│ 2020-10-26 00:00:00 GMT ┆ 0ms │
└─────────────────────────────┴──────────────┘
"""
return wrap_expr(self._pyexpr.dt_dst_offset())
37 changes: 35 additions & 2 deletions py-polars/polars/series/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -1829,7 +1829,7 @@ def month_start(self) -> Series:
Returns
-------
Date/Datetime expression
Date/Datetime series
Notes
-----
Expand Down Expand Up @@ -1859,7 +1859,7 @@ def month_end(self) -> Series:
Returns
-------
Date/Datetime expression
Date/Datetime series.
Notes
-----
Expand All @@ -1882,3 +1882,36 @@ def month_end(self) -> Series:
2000-04-30 02:00:00
]
"""

def dst_offset(self) -> Series:
"""
Additional offset currently in effect (typically due to daylight saving time).
Returns
-------
Duration Series
Examples
--------
>>> from datetime import datetime
>>> ser = pl.date_range(
... datetime(2020, 10, 25),
... datetime(2020, 10, 26),
... time_zone="Europe/London",
... eager=True,
... )
>>> ser
shape: (2,)
Series: 'date' [datetime[μs, Europe/London]]
[
2020-10-25 00:00:00 BST
2020-10-26 00:00:00 GMT
]
>>> ser.dt.dst_offset().rename("dst_offset")
shape: (2,)
Series: 'dst_offset' [duration[ms]]
[
1h
0ms
]
"""
5 changes: 5 additions & 0 deletions py-polars/src/expr/datetime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,11 @@ impl PyExpr {
self.inner.clone().dt().month_end().into()
}

#[cfg(feature = "timezones")]
fn dt_dst_offset(&self) -> Self {
self.inner.clone().dt().dst_offset().into()
}

fn dt_round(&self, every: &str, offset: &str) -> Self {
self.inner.clone().dt().round(every, offset).into()
}
Expand Down
38 changes: 38 additions & 0 deletions py-polars/tests/unit/namespaces/test_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,44 @@ def test_month_start_end_invalid() -> None:
ser.dt.month_end()


@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])
def test_dst_offset(time_unit: TimeUnit) -> None:
ser = pl.date_range(
datetime(2020, 10, 25),
datetime(2020, 10, 26),
time_zone="Europe/London",
eager=True,
).dt.cast_time_unit(time_unit)
result = ser.dt.dst_offset().rename("dst_offset")
expected = pl.Series("dst_offset", [3_600 * 1_000, 0], dtype=pl.Duration("ms"))
assert_series_equal(result, expected)


def test_dst_offset_lazy_schema() -> None:
ser = pl.date_range(
datetime(2020, 10, 25),
datetime(2020, 10, 26),
time_zone="Europe/London",
eager=True,
)
df = pl.DataFrame({"ts": ser}).lazy()
result = df.with_columns(dst_offset=pl.col("ts").dt.dst_offset()).schema
expected = {
"ts": pl.Datetime(time_unit="us", time_zone="Europe/London"),
"dst_offset": pl.Duration(time_unit="ms"),
}
assert result == expected


def test_dst_offset_invalid() -> None:
ser = pl.date_range(datetime(2020, 10, 25), datetime(2020, 10, 26), eager=True)
with pytest.raises(
InvalidOperationError,
match=r"`dst_offset` operation not supported for dtype `datetime\[μs\]` \(expected: time-zone-aware datetime\)",
):
ser.dt.dst_offset().rename("dst_offset")


@pytest.mark.parametrize(
("time_unit", "expected"),
[
Expand Down

0 comments on commit 40af4dd

Please sign in to comment.