Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(rust, python): add dt.dst_offset feature #9629

Merged
merged 6 commits into from
Jun 30, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions polars/polars-lazy/polars-plan/src/dsl/dt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,13 @@ impl DateLikeNameSpace {
.map_private(FunctionExpr::TemporalExpr(TemporalFunction::MonthEnd))
}

// Get the additional offset from UTC currently in effect (usually due to daylight saving time)
#[cfg(feature = "timezones")]
pub fn dst_offset(self) -> Expr {
self.0
.map_private(FunctionExpr::TemporalExpr(TemporalFunction::DSTOffset))
}

pub fn round<S: AsRef<str>>(self, every: S, offset: S) -> Expr {
let every = every.as_ref().into();
let offset = offset.as_ref().into();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ pub enum TemporalFunction {
MonthStart,
#[cfg(feature = "date_offset")]
MonthEnd,
#[cfg(feature = "timezones")]
DSTOffset,
Round(String, String),
#[cfg(feature = "timezones")]
CastTimezone(Option<TimeZone>, Option<bool>),
Expand Down Expand Up @@ -77,6 +79,8 @@ impl Display for TemporalFunction {
MonthStart => "month_start",
#[cfg(feature = "date_offset")]
MonthEnd => "month_end",
#[cfg(feature = "timezones")]
DSTOffset => "dst_offset",
Round(..) => "round",
#[cfg(feature = "timezones")]
CastTimezone(_, _) => "replace_timezone",
Expand Down Expand Up @@ -254,6 +258,23 @@ pub(super) fn month_end(s: &Series) -> PolarsResult<Series> {
})
}

#[cfg(feature = "timezones")]
pub(super) fn dst_offset(s: &Series) -> PolarsResult<Series> {
match s.dtype() {
DataType::Datetime(_, Some(tz)) => {
let tz = tz
.parse::<Tz>()
.expect("Time zone has already been validated");
Ok(s.datetime().unwrap().dst_offset(&tz).into_series())
}
dt => polars_bail!(
opq = dst_offset,
got = dt,
expected = "time-zone-aware datetime"
),
}
}

pub(super) fn round(s: &Series, every: &str, offset: &str) -> PolarsResult<Series> {
let every = Duration::parse(every);
let offset = Duration::parse(offset);
Expand Down
2 changes: 2 additions & 0 deletions polars/polars-lazy/polars-plan/src/dsl/function_expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -614,6 +614,8 @@ impl From<TemporalFunction> for SpecialEq<Arc<dyn SeriesUdf>> {
MonthStart => map!(datetime::month_start),
#[cfg(feature = "date_offset")]
MonthEnd => map!(datetime::month_end),
#[cfg(feature = "timezones")]
DSTOffset => map!(datetime::dst_offset),
Round(every, offset) => map!(datetime::round, &every, &offset),
#[cfg(feature = "timezones")]
CastTimezone(tz, use_earliest) => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ impl FunctionExpr {
MonthStart => mapper.with_same_dtype().unwrap().dtype,
#[cfg(feature = "date_offset")]
MonthEnd => mapper.with_same_dtype().unwrap().dtype,
#[cfg(feature = "timezones")]
DSTOffset => DataType::Duration(TimeUnit::Milliseconds),
Round(..) => mapper.with_same_dtype().unwrap().dtype,
#[cfg(feature = "timezones")]
CastTimezone(tz, _use_earliest) => {
Expand Down
37 changes: 37 additions & 0 deletions polars/polars-time/src/dst_offset.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#[cfg(feature = "timezones")]
use chrono::TimeZone;
#[cfg(feature = "timezones")]
use chrono_tz::OffsetComponents;
#[cfg(feature = "timezones")]
use polars_arrow::time_zone::Tz;
#[cfg(feature = "timezones")]
use polars_core::prelude::*;
#[cfg(feature = "timezones")]
use polars_core::utils::arrow::temporal_conversions::{
timestamp_ms_to_datetime, timestamp_ns_to_datetime, timestamp_us_to_datetime,
};

#[cfg(feature = "timezones")]
pub trait PolarsDSTOffset {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given that we only dispatch via the expression API. Shall simply make a function that accepts a &DatetimeChunked.

fn dst_offset(&self, time_zone: &Tz) -> DurationChunked
where
Self: Sized;
}

#[cfg(feature = "timezones")]
impl PolarsDSTOffset for DatetimeChunked {
fn dst_offset(&self, time_zone: &Tz) -> DurationChunked {
let timestamp_to_datetime = match self.time_unit() {
TimeUnit::Nanoseconds => timestamp_ns_to_datetime,
TimeUnit::Microseconds => timestamp_us_to_datetime,
TimeUnit::Milliseconds => timestamp_ms_to_datetime,
};
self.0
.apply(|t| {
let ndt = timestamp_to_datetime(t);
let dt = time_zone.from_utc_datetime(&ndt);
dt.offset().dst_offset().num_milliseconds()
})
.into_duration(TimeUnit::Milliseconds)
}
}
3 changes: 3 additions & 0 deletions polars/polars-time/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#![cfg_attr(docsrs, feature(doc_auto_cfg))]
pub mod chunkedarray;
mod date_range;
mod dst_offset;
mod groupby;
mod month_end;
mod month_start;
Expand All @@ -13,6 +14,8 @@ mod utils;
mod windows;

pub use date_range::*;
#[cfg(feature = "timezones")]
pub use dst_offset::*;
#[cfg(any(feature = "dtype-date", feature = "dtype-datetime"))]
pub use groupby::dynamic::*;
pub use month_end::*;
Expand Down
30 changes: 30 additions & 0 deletions py-polars/polars/expr/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -1972,3 +1972,33 @@ def month_end(self) -> Expr:
└─────────────────────┘
"""
return wrap_expr(self._pyexpr.dt_month_end())

def dst_offset(self) -> Expr:
"""
Additional offset currently in effect (typically due to daylight saving time).

Returns
-------
Duration expression

Examples
--------
>>> from datetime import datetime
>>> df = pl.DataFrame(
... {
... "ts": [datetime(2020, 10, 25), datetime(2020, 10, 26)],
... }
... )
>>> df = df.with_columns(pl.col("ts").dt.replace_time_zone("Europe/London"))
>>> df.with_columns(pl.col("ts").dt.dst_offset().alias("dst_offset"))
shape: (2, 2)
┌─────────────────────────────┬──────────────┐
│ ts ┆ dst_offset │
│ --- ┆ --- │
│ datetime[μs, Europe/London] ┆ duration[ms] │
╞═════════════════════════════╪══════════════╡
│ 2020-10-25 00:00:00 BST ┆ 1h │
│ 2020-10-26 00:00:00 GMT ┆ 0ms │
└─────────────────────────────┴──────────────┘
"""
return wrap_expr(self._pyexpr.dt_dst_offset())
37 changes: 35 additions & 2 deletions py-polars/polars/series/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -1829,7 +1829,7 @@ def month_start(self) -> Series:

Returns
-------
Date/Datetime expression
Date/Datetime series

Notes
-----
Expand Down Expand Up @@ -1859,7 +1859,7 @@ def month_end(self) -> Series:

Returns
-------
Date/Datetime expression
Date/Datetime series.

Notes
-----
Expand All @@ -1882,3 +1882,36 @@ def month_end(self) -> Series:
2000-04-30 02:00:00
]
"""

def dst_offset(self) -> Series:
"""
Additional offset currently in effect (typically due to daylight saving time).

Returns
-------
Duration Series

Examples
--------
>>> from datetime import datetime
>>> ser = pl.date_range(
... datetime(2020, 10, 25),
... datetime(2020, 10, 26),
... time_zone="Europe/London",
... eager=True,
... )
>>> ser
shape: (2,)
Series: 'date' [datetime[μs, Europe/London]]
[
2020-10-25 00:00:00 BST
2020-10-26 00:00:00 GMT
]
>>> ser.dt.dst_offset().rename("dst_offset")
shape: (2,)
Series: 'dst_offset' [duration[ms]]
[
1h
0ms
]
"""
5 changes: 5 additions & 0 deletions py-polars/src/expr/datetime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,11 @@ impl PyExpr {
self.inner.clone().dt().month_end().into()
}

#[cfg(feature = "timezones")]
fn dt_dst_offset(&self) -> Self {
self.inner.clone().dt().dst_offset().into()
}

fn dt_round(&self, every: &str, offset: &str) -> Self {
self.inner.clone().dt().round(every, offset).into()
}
Expand Down
38 changes: 38 additions & 0 deletions py-polars/tests/unit/namespaces/test_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,44 @@ def test_month_start_end_invalid() -> None:
ser.dt.month_end()


@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])
def test_dst_offset(time_unit: TimeUnit) -> None:
ser = pl.date_range(
datetime(2020, 10, 25),
datetime(2020, 10, 26),
time_zone="Europe/London",
eager=True,
).dt.cast_time_unit(time_unit)
result = ser.dt.dst_offset().rename("dst_offset")
expected = pl.Series("dst_offset", [3_600 * 1_000, 0], dtype=pl.Duration("ms"))
assert_series_equal(result, expected)


def test_dst_offset_lazy_schema() -> None:
ser = pl.date_range(
datetime(2020, 10, 25),
datetime(2020, 10, 26),
time_zone="Europe/London",
eager=True,
)
df = pl.DataFrame({"ts": ser}).lazy()
result = df.with_columns(dst_offset=pl.col("ts").dt.dst_offset()).schema
expected = {
"ts": pl.Datetime(time_unit="us", time_zone="Europe/London"),
"dst_offset": pl.Duration(time_unit="ms"),
}
assert result == expected


def test_dst_offset_invalid() -> None:
ser = pl.date_range(datetime(2020, 10, 25), datetime(2020, 10, 26), eager=True)
with pytest.raises(
InvalidOperationError,
match=r"`dst_offset` operation not supported for dtype `datetime\[μs\]` \(expected: time-zone-aware datetime\)",
):
ser.dt.dst_offset().rename("dst_offset")


@pytest.mark.parametrize(
("time_unit", "expected"),
[
Expand Down