diff --git a/datafusion/core/tests/sqllogictests/test_files/dates.slt b/datafusion/core/tests/sqllogictests/test_files/dates.slt index 0cb96d8bc775..bfeb92fd5960 100644 --- a/datafusion/core/tests/sqllogictests/test_files/dates.slt +++ b/datafusion/core/tests/sqllogictests/test_files/dates.slt @@ -94,13 +94,13 @@ query error DataFusion error: Error during planning: Unsupported argument types\ SELECT DATE '2023-04-09' - DATE '2023-04-02'; # DATE minus Timestamp -query P +query ? SELECT DATE '2023-04-09' - '2000-01-01T00:00:00'::timestamp; ---- 0 years 0 mons 8499 days 0 hours 0 mins 0.000000000 secs # Timestamp minus DATE -query P +query ? SELECT '2023-01-01T00:00:00'::timestamp - DATE '2021-01-01'; ---- 0 years 0 mons 730 days 0 hours 0 mins 0.000000000 secs diff --git a/datafusion/core/tests/sqllogictests/test_files/interval.slt b/datafusion/core/tests/sqllogictests/test_files/interval.slt index 15b18f51ddc2..82131f5d2a26 100644 --- a/datafusion/core/tests/sqllogictests/test_files/interval.slt +++ b/datafusion/core/tests/sqllogictests/test_files/interval.slt @@ -327,10 +327,10 @@ select '1 month'::interval + '1980-01-01T12:00:00'::timestamp; # Exected error: interval (scalar) - date / timestamp (scalar) -query error DataFusion error: type_coercion\ncaused by\nError during planning: interval can't subtract timestamp/date +query error DataFusion error: type_coercion\ncaused by\nError during planning: Interval\(MonthDayNano\) \- Date32 can't be evaluated because there isn't a common type to coerce the types to select '1 month'::interval - '1980-01-01'::date; -query error DataFusion error: type_coercion\ncaused by\nError during planning: interval can't subtract timestamp/date +query error DataFusion error: type_coercion\ncaused by\nError during planning: Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) can't be evaluated because there isn't a common type to coerce the types to select '1 month'::interval - '1980-01-01T12:00:00'::timestamp; # interval (array) + date / timestamp (array) @@ -349,10 +349,10 @@ select i + ts from t; 2000-02-01T00:01:00 # expected error interval (array) - date / timestamp (array) -query error DataFusion error: type_coercion\ncaused by\nError during planning: interval can't subtract timestamp/date +query error DataFusion error: type_coercion\ncaused by\nError during planning: Interval\(MonthDayNano\) \- Date32 can't be evaluated because there isn't a common type to coerce the types to select i - d from t; -query error DataFusion error: type_coercion\ncaused by\nError during planning: interval can't subtract timestamp/date +query error DataFusion error: type_coercion\ncaused by\nError during planning: Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) can't be evaluated because there isn't a common type to coerce the types to select i - ts from t; @@ -372,10 +372,10 @@ select '1 month'::interval + ts from t; 2000-03-01T00:00:00 # expected error interval (scalar) - date / timestamp (array) -query error DataFusion error: type_coercion\ncaused by\nError during planning: interval can't subtract timestamp/date +query error DataFusion error: type_coercion\ncaused by\nError during planning: Interval\(MonthDayNano\) \- Date32 can't be evaluated because there isn't a common type to coerce the types to select '1 month'::interval - d from t; -query error DataFusion error: type_coercion\ncaused by\nError during planning: interval can't subtract timestamp/date +query error DataFusion error: type_coercion\ncaused by\nError during planning: Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) can't be evaluated because there isn't a common type to coerce the types to select '1 month'::interval - ts from t; statement ok diff --git a/datafusion/core/tests/sqllogictests/test_files/timestamps.slt b/datafusion/core/tests/sqllogictests/test_files/timestamps.slt index e1102d626ccf..42a5f8e3a488 100644 --- a/datafusion/core/tests/sqllogictests/test_files/timestamps.slt +++ b/datafusion/core/tests/sqllogictests/test_files/timestamps.slt @@ -1014,7 +1014,7 @@ SELECT ts1 + i FROM foo; 2003-07-12T01:31:15.000123463 # Timestamp + Timestamp => error -statement error DataFusion error: Error during planning: Unsupported argument types\. Can not evaluate Timestamp\(Nanosecond, None\) \+ Timestamp\(Nanosecond, None\) +query error DataFusion error: type_coercion\ncaused by\nInternal error: Unsupported operation Plus between Timestamp\(Nanosecond, None\) and Timestamp\(Nanosecond, None\)\. This was likely caused by a bug in DataFusion's code and we would welcome that you file an bug report in our issue tracker SELECT ts1 + ts2 FROM foo; @@ -1031,7 +1031,7 @@ SELECT '2000-01-01T00:00:00'::timestamp - '2010-01-01T00:00:00'::timestamp; 0 years 0 mons -3653 days 0 hours 0 mins 0.000000000 secs # Interval - Timestamp => error -statement error DataFusion error: type_coercion\ncaused by\nError during planning: interval can't subtract timestamp/date +statement error DataFusion error: type_coercion\ncaused by\nError during planning: Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) can't be evaluated because there isn't a common type to coerce the types to SELECT i - ts1 from FOO; statement ok diff --git a/datafusion/expr/src/type_coercion/binary.rs b/datafusion/expr/src/type_coercion/binary.rs index 006b56c7feee..f01b06e0db06 100644 --- a/datafusion/expr/src/type_coercion/binary.rs +++ b/datafusion/expr/src/type_coercion/binary.rs @@ -19,17 +19,70 @@ use arrow::compute::can_cast_types; use arrow::datatypes::{ - DataType, IntervalUnit, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE, + DataType, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE, }; use datafusion_common::DataFusionError; use datafusion_common::Result; -use crate::type_coercion::{ - is_datetime, is_decimal, is_interval, is_numeric, is_timestamp, -}; +use crate::type_coercion::{is_datetime, is_decimal, is_interval, is_numeric}; use crate::Operator; +/// Returns the result type of applying mathematics operations such as +/// `+` to arguments of `lhs_type` and `rhs_type`. +fn mathematics_temporal_result_type( + lhs_type: &DataType, + rhs_type: &DataType, +) -> Option { + use arrow::datatypes::DataType::*; + use arrow::datatypes::IntervalUnit::*; + use arrow::datatypes::TimeUnit::*; + + if !is_interval(lhs_type) + && !is_interval(rhs_type) + && !is_datetime(lhs_type) + && !is_datetime(rhs_type) + { + return None; + }; + + match (lhs_type, rhs_type) { + // datetime +/- interval + (Interval(_), Timestamp(_, _)) => Some(rhs_type.clone()), + (Timestamp(_, _), Interval(_)) => Some(lhs_type.clone()), + (Interval(_), Date32) => Some(rhs_type.clone()), + (Date32, Interval(_)) => Some(lhs_type.clone()), + (Interval(_), Date64) => Some(rhs_type.clone()), + (Date64, Interval(_)) => Some(lhs_type.clone()), + // interval +/- + (Interval(YearMonth), Interval(YearMonth)) => Some(Interval(YearMonth)), + (Interval(DayTime), Interval(DayTime)) => Some(Interval(DayTime)), + (Interval(_), Interval(_)) => Some(Interval(MonthDayNano)), + // timestamp - timestamp + (Timestamp(Second, _), Timestamp(Second, _)) + | (Timestamp(Millisecond, _), Timestamp(Millisecond, _)) => { + Some(Interval(DayTime)) + } + (Timestamp(Microsecond, _), Timestamp(Microsecond, _)) + | (Timestamp(Nanosecond, _), Timestamp(Nanosecond, _)) => { + Some(Interval(MonthDayNano)) + } + (Timestamp(_, _), Timestamp(_, _)) => None, + // TODO: date minus date + // date - timestamp, timestamp - date + (Date32, Timestamp(_, _)) + | (Timestamp(_, _), Date32) + | (Date64, Timestamp(_, _)) + | (Timestamp(_, _), Date64) => { + // TODO: make get_result_type must after coerce type. + // if type isn't coerced, we need get common type, and then get result type. + let common_type = temporal_coercion(lhs_type, rhs_type); + common_type.and_then(|t| mathematics_temporal_result_type(&t, &t)) + } + _ => None, + } +} + /// returns the resulting type of a binary expression evaluating the `op` with the left and right hand types pub fn get_result_type( lhs_type: &DataType, @@ -67,12 +120,12 @@ pub fn get_result_type( | Operator::IsDistinctFrom | Operator::IsNotDistinctFrom => Some(DataType::Boolean), Operator::Plus | Operator::Minus - if is_datetime(lhs_type) - || is_datetime(rhs_type) - || is_interval(lhs_type) - || is_interval(rhs_type) => + if is_datetime(lhs_type) && is_datetime(rhs_type) + || (is_interval(lhs_type) && is_interval(rhs_type)) + || (is_datetime(lhs_type) && is_interval(rhs_type)) + || (is_interval(lhs_type) && is_datetime(rhs_type)) => { - temporal_add_sub_coercion(lhs_type, rhs_type, op) + mathematics_temporal_result_type(lhs_type, rhs_type) } // following same with `coerce_types` Operator::BitwiseAnd @@ -126,19 +179,13 @@ pub fn coerce_types( | Operator::LtEq | Operator::IsDistinctFrom | Operator::IsNotDistinctFrom => comparison_coercion(lhs_type, rhs_type), - // interval - timestamp is an erroneous case, cannot coerce a type Operator::Plus | Operator::Minus - if is_datetime(lhs_type) - || is_datetime(rhs_type) - || is_interval(lhs_type) - || is_interval(rhs_type) => + if is_interval(lhs_type) && is_interval(rhs_type) => { - if is_interval(lhs_type) && is_datetime(rhs_type) && *op == Operator::Minus { - return Err(DataFusionError::Plan( - "interval can't subtract timestamp/date".to_string(), - )); - } - temporal_add_sub_coercion(lhs_type, rhs_type, op) + temporal_coercion(lhs_type, rhs_type) + } + Operator::Minus if is_datetime(lhs_type) && is_datetime(rhs_type) => { + temporal_coercion(lhs_type, rhs_type) } // for math expressions, the final value of the coercion is also the return type // because coercion favours higher information types @@ -208,7 +255,10 @@ pub fn math_decimal_coercion( /// Returns the output type of applying bitwise operations such as /// `&`, `|`, or `xor`to arguments of `lhs_type` and `rhs_type`. -fn bitwise_coercion(left_type: &DataType, right_type: &DataType) -> Option { +pub(crate) fn bitwise_coercion( + left_type: &DataType, + right_type: &DataType, +) -> Option { use arrow::datatypes::DataType::*; if !both_numeric_or_null_and_numeric(left_type, right_type) { @@ -260,80 +310,6 @@ pub fn comparison_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option Option { - match (lhs_type, rhs_type, op) { - // if an interval is being added/subtracted from a date/timestamp, return the date/timestamp data type - (lhs, rhs, _) if is_interval(lhs) && is_datetime(rhs) => Some(rhs.clone()), - (lhs, rhs, _) if is_interval(rhs) && is_datetime(lhs) => Some(lhs.clone()), - // if two timestamps are being subtracted, check their time units and return the corresponding interval data type - (lhs, rhs, Operator::Minus) if is_timestamp(lhs) && is_timestamp(rhs) => { - handle_timestamp_minus(lhs, rhs) - } - // if two intervals are being added/subtracted, check their interval units and return the corresponding interval data type - (lhs, rhs, _) if is_interval(lhs) && is_interval(rhs) => { - handle_interval_addition(lhs, rhs) - } - (lhs, rhs, Operator::Minus) if is_datetime(lhs) && is_datetime(rhs) => { - temporal_coercion(lhs, rhs) - } - // return None if no coercion is possible - _ => None, - } -} - -// This function checks if two interval data types have the same interval unit and returns an interval data type -// representing the sum of them. If the two interval data types have different units, it returns an interval data type -// with "IntervalUnit::MonthDayNano". If the two interval data types are already "IntervalUnit::YearMonth" or "IntervalUnit::DayTime", -// it returns an interval data type with the same unit as the operands. -fn handle_interval_addition(lhs: &DataType, rhs: &DataType) -> Option { - match (lhs, rhs) { - // operation with the same types - ( - DataType::Interval(IntervalUnit::YearMonth), - DataType::Interval(IntervalUnit::YearMonth), - ) => Some(DataType::Interval(IntervalUnit::YearMonth)), - ( - DataType::Interval(IntervalUnit::DayTime), - DataType::Interval(IntervalUnit::DayTime), - ) => Some(DataType::Interval(IntervalUnit::DayTime)), - // operation with MonthDayNano's or different types - (_, _) => Some(DataType::Interval(IntervalUnit::MonthDayNano)), - } -} - -// This function checks if two timestamp data types have the same time unit and returns an interval data type -// representing the difference between them, either "IntervalUnit::DayTime" if the time unit is second or millisecond, -// or "IntervalUnit::MonthDayNano" if the time unit is microsecond or nanosecond. If the two timestamp data types have -// different time units, it returns an error indicating that "The timestamps have different types". -fn handle_timestamp_minus(lhs: &DataType, rhs: &DataType) -> Option { - match (lhs, rhs) { - ( - DataType::Timestamp(TimeUnit::Second, _), - DataType::Timestamp(TimeUnit::Second, _), - ) - | ( - DataType::Timestamp(TimeUnit::Millisecond, _), - DataType::Timestamp(TimeUnit::Millisecond, _), - ) => Some(DataType::Interval(IntervalUnit::DayTime)), - ( - DataType::Timestamp(TimeUnit::Microsecond, _), - DataType::Timestamp(TimeUnit::Microsecond, _), - ) - | ( - DataType::Timestamp(TimeUnit::Nanosecond, _), - DataType::Timestamp(TimeUnit::Nanosecond, _), - ) => Some(DataType::Interval(IntervalUnit::MonthDayNano)), - (_, _) => None, - } -} - /// Returns the output type of applying numeric operations such as `=` /// to arguments `lhs_type` and `rhs_type` if one is numeric and one /// is `Utf8`/`LargeUtf8`. @@ -777,9 +753,15 @@ fn is_time_with_valid_unit(datatype: DataType) -> bool { /// Coercion rules for Temporal columns: the type that both lhs and rhs can be /// casted to for the purpose of a date computation +/// For interval arithmetic, it doesn't handle datetime type +/- interval fn temporal_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option { use arrow::datatypes::DataType::*; + use arrow::datatypes::IntervalUnit::*; match (lhs_type, rhs_type) { + // interval +/- + (Interval(YearMonth), Interval(YearMonth)) => Some(Interval(YearMonth)), + (Interval(DayTime), Interval(DayTime)) => Some(Interval(DayTime)), + (Interval(_), Interval(_)) => Some(Interval(MonthDayNano)), (Date64, Date32) | (Date32, Date64) => Some(Date64), (Utf8, Date32) | (Date32, Utf8) => Some(Date32), (Utf8, Date64) | (Date64, Utf8) => Some(Date64), @@ -1055,14 +1037,12 @@ mod tests { #[test] fn test_date_timestamp_arithmetic_error() -> Result<()> { - let err = coerce_types( + let common_type = coerce_types( &DataType::Timestamp(TimeUnit::Nanosecond, None), &Operator::Minus, &DataType::Timestamp(TimeUnit::Millisecond, None), - ) - .unwrap_err() - .to_string(); - assert_contains!(&err, "Timestamp(Nanosecond, None) - Timestamp(Millisecond, None) can't be evaluated because there isn't a common type to coerce the types to"); + )?; + assert_eq!(common_type.to_string(), "Timestamp(Millisecond, None)"); let err = coerce_types(&DataType::Date32, &Operator::Plus, &DataType::Date64) .unwrap_err()