Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(python,rust): extend dtype/selector matching for Datetime with a "*" wildcard for timezones #9641

Merged
merged 4 commits into from
Jul 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 22 additions & 9 deletions polars/polars-lazy/polars-plan/src/logical_plan/projection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,20 @@ pub(super) fn replace_dtype_with_column(mut expr: Expr, column_name: Arc<str>) -
expr
}

fn dtypes_match(d1: &DataType, d2: &DataType) -> bool {
match (d1, d2) {
// note: allow Datetime "*" wildcard for timezones...
(DataType::Datetime(tu_l, tz_l), DataType::Datetime(tu_r, tz_r)) => {
tu_l == tu_r
&& (tz_l == tz_r
|| tz_r.is_some() && (tz_l.as_deref().unwrap_or("") == "*")
|| tz_l.is_some() && (tz_r.as_deref().unwrap_or("") == "*"))
}
// ...but otherwise require exact match
_ => d1 == d2,
}
}

/// replace `DtypeColumn` with `col("foo")..col("bar")`
fn expand_dtypes(
expr: &Expr,
Expand All @@ -205,10 +219,10 @@ fn expand_dtypes(
) -> PolarsResult<()> {
// note: we loop over the schema to guarantee that we return a stable
// field-order, irrespective of which dtypes are filtered against
for field in schema
.iter_fields()
.filter(|f| (dtypes.contains(&f.dtype) && !exclude.contains(f.name().as_str())))
{
for field in schema.iter_fields().filter(|f| {
dtypes.iter().any(|dtype| dtypes_match(dtype, &f.dtype))
&& !exclude.contains(f.name().as_str())
}) {
let name = field.name();
let new_expr = expr.clone();
let new_expr = replace_dtype_with_column(new_expr, Arc::from(name.as_str()));
Expand All @@ -230,10 +244,9 @@ fn prepare_excluded(
if let Expr::Exclude(_, to_exclude) = e {
#[cfg(feature = "regex")]
{
// instead of matching the names for regex patterns
// and expanding the matches in the schema we
// reuse the `replace_regex` function. This is a bit
// slower but DRY.
// instead of matching the names for regex patterns and
// expanding the matches in the schema we reuse the
// `replace_regex` func; this is a bit slower but DRY.
let mut buf = vec![];
for to_exclude_single in to_exclude {
match to_exclude_single {
Expand All @@ -249,7 +262,7 @@ fn prepare_excluded(
}
Excluded::Dtype(dt) => {
for fld in schema.iter_fields() {
if fld.data_type() == dt {
if dtypes_match(fld.data_type(), dt) {
alexander-beedie marked this conversation as resolved.
Show resolved Hide resolved
exclude.insert(Arc::from(fld.name().as_ref()));
}
}
Expand Down
6 changes: 4 additions & 2 deletions py-polars/polars/datatypes/classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,10 +311,12 @@ def __init__(
Parameters
----------
time_unit : {'us', 'ns', 'ms'}
Unit of time.
Unit of time / precision.
time_zone
Time zone string as defined in zoneinfo (run
Time zone string, as defined in zoneinfo (to see valid strings run
``import zoneinfo; zoneinfo.available_timezones()`` for a full list).
When using to match dtypes, can use "*" to check for Datetime columns
that have any timezone.

"""
if isinstance(time_zone, timezone):
Expand Down
3 changes: 3 additions & 0 deletions py-polars/polars/datatypes/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@
Datetime("ms"),
Datetime("us"),
Datetime("ns"),
Datetime("ms", "*"),
Datetime("us", "*"),
Datetime("ns", "*"),
]
)
DURATION_DTYPES: frozenset[PolarsDataType] = DataTypeGroup(
Expand Down
Loading