Skip to content

Commit

Permalink
refactor: separate get_common_type / get_result_type for temporal type (
Browse files Browse the repository at this point in the history
  • Loading branch information
jackwener authored May 5, 2023
1 parent dbe78f4 commit cf233c8
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 110 deletions.
4 changes: 2 additions & 2 deletions datafusion/core/tests/sqllogictests/test_files/dates.slt
Original file line number Diff line number Diff line change
Expand Up @@ -94,13 +94,13 @@ query error DataFusion error: Error during planning: Unsupported argument types\
SELECT DATE '2023-04-09' - DATE '2023-04-02';

# DATE minus Timestamp
query P
query ?
SELECT DATE '2023-04-09' - '2000-01-01T00:00:00'::timestamp;
----
0 years 0 mons 8499 days 0 hours 0 mins 0.000000000 secs

# Timestamp minus DATE
query P
query ?
SELECT '2023-01-01T00:00:00'::timestamp - DATE '2021-01-01';
----
0 years 0 mons 730 days 0 hours 0 mins 0.000000000 secs
12 changes: 6 additions & 6 deletions datafusion/core/tests/sqllogictests/test_files/interval.slt
Original file line number Diff line number Diff line change
Expand Up @@ -327,10 +327,10 @@ select '1 month'::interval + '1980-01-01T12:00:00'::timestamp;

# Exected error: interval (scalar) - date / timestamp (scalar)

query error DataFusion error: type_coercion\ncaused by\nError during planning: interval can't subtract timestamp/date
query error DataFusion error: type_coercion\ncaused by\nError during planning: Interval\(MonthDayNano\) \- Date32 can't be evaluated because there isn't a common type to coerce the types to
select '1 month'::interval - '1980-01-01'::date;

query error DataFusion error: type_coercion\ncaused by\nError during planning: interval can't subtract timestamp/date
query error DataFusion error: type_coercion\ncaused by\nError during planning: Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) can't be evaluated because there isn't a common type to coerce the types to
select '1 month'::interval - '1980-01-01T12:00:00'::timestamp;

# interval (array) + date / timestamp (array)
Expand All @@ -349,10 +349,10 @@ select i + ts from t;
2000-02-01T00:01:00

# expected error interval (array) - date / timestamp (array)
query error DataFusion error: type_coercion\ncaused by\nError during planning: interval can't subtract timestamp/date
query error DataFusion error: type_coercion\ncaused by\nError during planning: Interval\(MonthDayNano\) \- Date32 can't be evaluated because there isn't a common type to coerce the types to
select i - d from t;

query error DataFusion error: type_coercion\ncaused by\nError during planning: interval can't subtract timestamp/date
query error DataFusion error: type_coercion\ncaused by\nError during planning: Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) can't be evaluated because there isn't a common type to coerce the types to
select i - ts from t;


Expand All @@ -372,10 +372,10 @@ select '1 month'::interval + ts from t;
2000-03-01T00:00:00

# expected error interval (scalar) - date / timestamp (array)
query error DataFusion error: type_coercion\ncaused by\nError during planning: interval can't subtract timestamp/date
query error DataFusion error: type_coercion\ncaused by\nError during planning: Interval\(MonthDayNano\) \- Date32 can't be evaluated because there isn't a common type to coerce the types to
select '1 month'::interval - d from t;

query error DataFusion error: type_coercion\ncaused by\nError during planning: interval can't subtract timestamp/date
query error DataFusion error: type_coercion\ncaused by\nError during planning: Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) can't be evaluated because there isn't a common type to coerce the types to
select '1 month'::interval - ts from t;

statement ok
Expand Down
4 changes: 2 additions & 2 deletions datafusion/core/tests/sqllogictests/test_files/timestamps.slt
Original file line number Diff line number Diff line change
Expand Up @@ -1014,7 +1014,7 @@ SELECT ts1 + i FROM foo;
2003-07-12T01:31:15.000123463

# Timestamp + Timestamp => error
statement error DataFusion error: Error during planning: Unsupported argument types\. Can not evaluate Timestamp\(Nanosecond, None\) \+ Timestamp\(Nanosecond, None\)
query error DataFusion error: type_coercion\ncaused by\nInternal error: Unsupported operation Plus between Timestamp\(Nanosecond, None\) and Timestamp\(Nanosecond, None\)\. This was likely caused by a bug in DataFusion's code and we would welcome that you file an bug report in our issue tracker
SELECT ts1 + ts2
FROM foo;

Expand All @@ -1031,7 +1031,7 @@ SELECT '2000-01-01T00:00:00'::timestamp - '2010-01-01T00:00:00'::timestamp;
0 years 0 mons -3653 days 0 hours 0 mins 0.000000000 secs

# Interval - Timestamp => error
statement error DataFusion error: type_coercion\ncaused by\nError during planning: interval can't subtract timestamp/date
statement error DataFusion error: type_coercion\ncaused by\nError during planning: Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) can't be evaluated because there isn't a common type to coerce the types to
SELECT i - ts1 from FOO;

statement ok
Expand Down
180 changes: 80 additions & 100 deletions datafusion/expr/src/type_coercion/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,70 @@
use arrow::compute::can_cast_types;
use arrow::datatypes::{
DataType, IntervalUnit, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE,
DataType, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE,
};

use datafusion_common::DataFusionError;
use datafusion_common::Result;

use crate::type_coercion::{
is_datetime, is_decimal, is_interval, is_numeric, is_timestamp,
};
use crate::type_coercion::{is_datetime, is_decimal, is_interval, is_numeric};
use crate::Operator;

/// Returns the result type of applying mathematics operations such as
/// `+` to arguments of `lhs_type` and `rhs_type`.
fn mathematics_temporal_result_type(
lhs_type: &DataType,
rhs_type: &DataType,
) -> Option<DataType> {
use arrow::datatypes::DataType::*;
use arrow::datatypes::IntervalUnit::*;
use arrow::datatypes::TimeUnit::*;

if !is_interval(lhs_type)
&& !is_interval(rhs_type)
&& !is_datetime(lhs_type)
&& !is_datetime(rhs_type)
{
return None;
};

match (lhs_type, rhs_type) {
// datetime +/- interval
(Interval(_), Timestamp(_, _)) => Some(rhs_type.clone()),
(Timestamp(_, _), Interval(_)) => Some(lhs_type.clone()),
(Interval(_), Date32) => Some(rhs_type.clone()),
(Date32, Interval(_)) => Some(lhs_type.clone()),
(Interval(_), Date64) => Some(rhs_type.clone()),
(Date64, Interval(_)) => Some(lhs_type.clone()),
// interval +/-
(Interval(YearMonth), Interval(YearMonth)) => Some(Interval(YearMonth)),
(Interval(DayTime), Interval(DayTime)) => Some(Interval(DayTime)),
(Interval(_), Interval(_)) => Some(Interval(MonthDayNano)),
// timestamp - timestamp
(Timestamp(Second, _), Timestamp(Second, _))
| (Timestamp(Millisecond, _), Timestamp(Millisecond, _)) => {
Some(Interval(DayTime))
}
(Timestamp(Microsecond, _), Timestamp(Microsecond, _))
| (Timestamp(Nanosecond, _), Timestamp(Nanosecond, _)) => {
Some(Interval(MonthDayNano))
}
(Timestamp(_, _), Timestamp(_, _)) => None,
// TODO: date minus date
// date - timestamp, timestamp - date
(Date32, Timestamp(_, _))
| (Timestamp(_, _), Date32)
| (Date64, Timestamp(_, _))
| (Timestamp(_, _), Date64) => {
// TODO: make get_result_type must after coerce type.
// if type isn't coerced, we need get common type, and then get result type.
let common_type = temporal_coercion(lhs_type, rhs_type);
common_type.and_then(|t| mathematics_temporal_result_type(&t, &t))
}
_ => None,
}
}

/// returns the resulting type of a binary expression evaluating the `op` with the left and right hand types
pub fn get_result_type(
lhs_type: &DataType,
Expand Down Expand Up @@ -67,12 +120,12 @@ pub fn get_result_type(
| Operator::IsDistinctFrom
| Operator::IsNotDistinctFrom => Some(DataType::Boolean),
Operator::Plus | Operator::Minus
if is_datetime(lhs_type)
|| is_datetime(rhs_type)
|| is_interval(lhs_type)
|| is_interval(rhs_type) =>
if is_datetime(lhs_type) && is_datetime(rhs_type)
|| (is_interval(lhs_type) && is_interval(rhs_type))
|| (is_datetime(lhs_type) && is_interval(rhs_type))
|| (is_interval(lhs_type) && is_datetime(rhs_type)) =>
{
temporal_add_sub_coercion(lhs_type, rhs_type, op)
mathematics_temporal_result_type(lhs_type, rhs_type)
}
// following same with `coerce_types`
Operator::BitwiseAnd
Expand Down Expand Up @@ -126,19 +179,13 @@ pub fn coerce_types(
| Operator::LtEq
| Operator::IsDistinctFrom
| Operator::IsNotDistinctFrom => comparison_coercion(lhs_type, rhs_type),
// interval - timestamp is an erroneous case, cannot coerce a type
Operator::Plus | Operator::Minus
if is_datetime(lhs_type)
|| is_datetime(rhs_type)
|| is_interval(lhs_type)
|| is_interval(rhs_type) =>
if is_interval(lhs_type) && is_interval(rhs_type) =>
{
if is_interval(lhs_type) && is_datetime(rhs_type) && *op == Operator::Minus {
return Err(DataFusionError::Plan(
"interval can't subtract timestamp/date".to_string(),
));
}
temporal_add_sub_coercion(lhs_type, rhs_type, op)
temporal_coercion(lhs_type, rhs_type)
}
Operator::Minus if is_datetime(lhs_type) && is_datetime(rhs_type) => {
temporal_coercion(lhs_type, rhs_type)
}
// for math expressions, the final value of the coercion is also the return type
// because coercion favours higher information types
Expand Down Expand Up @@ -208,7 +255,10 @@ pub fn math_decimal_coercion(

/// Returns the output type of applying bitwise operations such as
/// `&`, `|`, or `xor`to arguments of `lhs_type` and `rhs_type`.
fn bitwise_coercion(left_type: &DataType, right_type: &DataType) -> Option<DataType> {
pub(crate) fn bitwise_coercion(
left_type: &DataType,
right_type: &DataType,
) -> Option<DataType> {
use arrow::datatypes::DataType::*;

if !both_numeric_or_null_and_numeric(left_type, right_type) {
Expand Down Expand Up @@ -260,80 +310,6 @@ pub fn comparison_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<D
.or_else(|| string_numeric_coercion(lhs_type, rhs_type))
}

// This function performs temporal coercion between the two input data types and the provided operator.
// It returns None (it will convert a Err outside) if the operands are an unsupported/wrong operation.
// If the coercion is possible, it returns a new data type as Some(DataType).
pub fn temporal_add_sub_coercion(
lhs_type: &DataType,
rhs_type: &DataType,
op: &Operator,
) -> Option<DataType> {
match (lhs_type, rhs_type, op) {
// if an interval is being added/subtracted from a date/timestamp, return the date/timestamp data type
(lhs, rhs, _) if is_interval(lhs) && is_datetime(rhs) => Some(rhs.clone()),
(lhs, rhs, _) if is_interval(rhs) && is_datetime(lhs) => Some(lhs.clone()),
// if two timestamps are being subtracted, check their time units and return the corresponding interval data type
(lhs, rhs, Operator::Minus) if is_timestamp(lhs) && is_timestamp(rhs) => {
handle_timestamp_minus(lhs, rhs)
}
// if two intervals are being added/subtracted, check their interval units and return the corresponding interval data type
(lhs, rhs, _) if is_interval(lhs) && is_interval(rhs) => {
handle_interval_addition(lhs, rhs)
}
(lhs, rhs, Operator::Minus) if is_datetime(lhs) && is_datetime(rhs) => {
temporal_coercion(lhs, rhs)
}
// return None if no coercion is possible
_ => None,
}
}

// This function checks if two interval data types have the same interval unit and returns an interval data type
// representing the sum of them. If the two interval data types have different units, it returns an interval data type
// with "IntervalUnit::MonthDayNano". If the two interval data types are already "IntervalUnit::YearMonth" or "IntervalUnit::DayTime",
// it returns an interval data type with the same unit as the operands.
fn handle_interval_addition(lhs: &DataType, rhs: &DataType) -> Option<DataType> {
match (lhs, rhs) {
// operation with the same types
(
DataType::Interval(IntervalUnit::YearMonth),
DataType::Interval(IntervalUnit::YearMonth),
) => Some(DataType::Interval(IntervalUnit::YearMonth)),
(
DataType::Interval(IntervalUnit::DayTime),
DataType::Interval(IntervalUnit::DayTime),
) => Some(DataType::Interval(IntervalUnit::DayTime)),
// operation with MonthDayNano's or different types
(_, _) => Some(DataType::Interval(IntervalUnit::MonthDayNano)),
}
}

// This function checks if two timestamp data types have the same time unit and returns an interval data type
// representing the difference between them, either "IntervalUnit::DayTime" if the time unit is second or millisecond,
// or "IntervalUnit::MonthDayNano" if the time unit is microsecond or nanosecond. If the two timestamp data types have
// different time units, it returns an error indicating that "The timestamps have different types".
fn handle_timestamp_minus(lhs: &DataType, rhs: &DataType) -> Option<DataType> {
match (lhs, rhs) {
(
DataType::Timestamp(TimeUnit::Second, _),
DataType::Timestamp(TimeUnit::Second, _),
)
| (
DataType::Timestamp(TimeUnit::Millisecond, _),
DataType::Timestamp(TimeUnit::Millisecond, _),
) => Some(DataType::Interval(IntervalUnit::DayTime)),
(
DataType::Timestamp(TimeUnit::Microsecond, _),
DataType::Timestamp(TimeUnit::Microsecond, _),
)
| (
DataType::Timestamp(TimeUnit::Nanosecond, _),
DataType::Timestamp(TimeUnit::Nanosecond, _),
) => Some(DataType::Interval(IntervalUnit::MonthDayNano)),
(_, _) => None,
}
}

/// Returns the output type of applying numeric operations such as `=`
/// to arguments `lhs_type` and `rhs_type` if one is numeric and one
/// is `Utf8`/`LargeUtf8`.
Expand Down Expand Up @@ -777,9 +753,15 @@ fn is_time_with_valid_unit(datatype: DataType) -> bool {

/// Coercion rules for Temporal columns: the type that both lhs and rhs can be
/// casted to for the purpose of a date computation
/// For interval arithmetic, it doesn't handle datetime type +/- interval
fn temporal_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
use arrow::datatypes::DataType::*;
use arrow::datatypes::IntervalUnit::*;
match (lhs_type, rhs_type) {
// interval +/-
(Interval(YearMonth), Interval(YearMonth)) => Some(Interval(YearMonth)),
(Interval(DayTime), Interval(DayTime)) => Some(Interval(DayTime)),
(Interval(_), Interval(_)) => Some(Interval(MonthDayNano)),
(Date64, Date32) | (Date32, Date64) => Some(Date64),
(Utf8, Date32) | (Date32, Utf8) => Some(Date32),
(Utf8, Date64) | (Date64, Utf8) => Some(Date64),
Expand Down Expand Up @@ -1055,14 +1037,12 @@ mod tests {

#[test]
fn test_date_timestamp_arithmetic_error() -> Result<()> {
let err = coerce_types(
let common_type = coerce_types(
&DataType::Timestamp(TimeUnit::Nanosecond, None),
&Operator::Minus,
&DataType::Timestamp(TimeUnit::Millisecond, None),
)
.unwrap_err()
.to_string();
assert_contains!(&err, "Timestamp(Nanosecond, None) - Timestamp(Millisecond, None) can't be evaluated because there isn't a common type to coerce the types to");
)?;
assert_eq!(common_type.to_string(), "Timestamp(Millisecond, None)");

let err = coerce_types(&DataType::Date32, &Operator::Plus, &DataType::Date64)
.unwrap_err()
Expand Down

0 comments on commit cf233c8

Please sign in to comment.