Skip to content

Commit

Permalink
Support coercing strings to intervals (for date_bin and others)
Browse files Browse the repository at this point in the history
  • Loading branch information
alamb committed Jan 31, 2023
1 parent c7e09e6 commit b54dc5a
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 4 deletions.
58 changes: 58 additions & 0 deletions datafusion/core/tests/sqllogictests/test_files/timestamps.slt
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,18 @@ SELECT DATE_BIN(INTERVAL '15 minutes', TIMESTAMP '2022-08-03 14:38:50Z', TIMESTA
----
2022-08-03T14:30:00

# Can coerce string interval arguments
query T
SELECT DATE_BIN('15 minutes', TIMESTAMP '2022-08-03 14:38:50Z', TIMESTAMP '1970-01-01T00:00:00Z') AS res
----
2022-08-03T14:30:00

# Can coerce all string arguments
query T
SELECT DATE_BIN('15 minutes', '2022-08-03 14:38:50Z', '1970-01-01T00:00:00Z') AS res
----
2022-08-03T14:30:00

# Shift forward by 5 minutes
query T
SELECT DATE_BIN(INTERVAL '15 minutes', TIMESTAMP '2022-08-03 14:38:50Z', TIMESTAMP '1970-01-01T00:05:00Z') AS res
Expand Down Expand Up @@ -131,3 +143,49 @@ FROM (
(TIMESTAMP '2021-06-10 17:05:00Z', TIMESTAMP '2001-01-01T00:00:00Z', 0.5),
(TIMESTAMP '2021-06-10 17:19:10Z', TIMESTAMP '2001-01-01T00:00:00Z', 0.3)
) as t (time, origin, val)

###
## test date_trunc function
###
query T
SELECT DATE_TRUNC('year', TIMESTAMP '2022-08-03 14:38:50Z');
----
2022-01-01T00:00:00

query T
SELECT DATE_TRUNC('quarter', TIMESTAMP '2022-08-03 14:38:50Z');
----
2022-07-01T00:00:00

query T
SELECT DATE_TRUNC('month', TIMESTAMP '2022-08-03 14:38:50Z');
----
2022-08-01T00:00:00

query T
SELECT DATE_TRUNC('day', TIMESTAMP '2022-08-03 14:38:50Z');
----
2022-08-03T00:00:00

query T
SELECT DATE_TRUNC('hour', TIMESTAMP '2022-08-03 14:38:50Z');
----
2022-08-03T14:00:00

query T
SELECT DATE_TRUNC('minute', TIMESTAMP '2022-08-03 14:38:50Z');
----
2022-08-03T14:38:00

query T
SELECT DATE_TRUNC('second', TIMESTAMP '2022-08-03 14:38:50Z');
----
2022-08-03T14:38:50


# Demonstrate that strings are automatically coerced to timestamps (don't use TIMESTAMP)

query T
SELECT DATE_TRUNC('second', '2022-08-03 14:38:50Z');
----
2022-08-03T14:38:50
11 changes: 8 additions & 3 deletions datafusion/expr/src/type_coercion/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,10 @@ use arrow::{
};
use datafusion_common::{DataFusionError, Result};

/// Performs type coercion for functions Returns the data types that
/// each argument must be coerced to match `signature`.
/// Performs type coercion for function arguments.
///
/// Returns the data types to which each argument must be coerced to
/// match `signature`.
///
/// For more details on coercion in general, please see the
/// [`type_coercion`](crate::type_coercion) module.
Expand Down Expand Up @@ -175,7 +177,10 @@ pub fn can_coerce_from(type_into: &DataType, type_from: &DataType) -> bool {
| Decimal128(_, _)
),
Timestamp(TimeUnit::Nanosecond, None) => {
matches!(type_from, Null | Timestamp(_, None))
matches!(type_from, Null | Timestamp(_, None) | Utf8 | LargeUtf8)
}
Interval(_) => {
matches!(type_from, Utf8 | LargeUtf8)
}
Utf8 | LargeUtf8 => true,
Null => can_cast_types(type_from, type_into),
Expand Down
14 changes: 13 additions & 1 deletion datafusion/optimizer/src/type_coercion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -553,10 +553,22 @@ fn coerce_arguments_for_signature(
expressions
.iter()
.enumerate()
.map(|(i, expr)| expr.clone().cast_to(&new_types[i], schema))
.map(|(i, expr)| cast_expr(expr, &new_types[i], schema))
.collect::<Result<Vec<_>>>()
}

/// Cast expr to the specified type, if possible
fn cast_expr(expr: &Expr, to_type: &DataType, schema: &DFSchema) -> Result<Expr> {
// Special case until Interval coercion is handled in arrow-rs
// TODO FILE A TICKET
match (expr, to_type) {
(Expr::Literal(ScalarValue::Utf8(Some(s))), DataType::Interval(_)) => {
parse_interval("millisecond", s.as_str()).map(Expr::Literal)
}
_ => expr.clone().cast_to(to_type, schema),
}
}

/// Returns the coerced exprs for each `input_exprs`.
/// Get the coerced data type from `aggregate_rule::coerce_types` and add `try_cast` if the
/// data type of `input_exprs` need to be coerced.
Expand Down

0 comments on commit b54dc5a

Please sign in to comment.