Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: separate get_common_type / get_result_type for temporal type #6250

Merged
merged 1 commit into from
May 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions datafusion/core/tests/sqllogictests/test_files/dates.slt
Original file line number Diff line number Diff line change
Expand Up @@ -94,13 +94,13 @@ query error DataFusion error: Error during planning: Unsupported argument types\
SELECT DATE '2023-04-09' - DATE '2023-04-02';

# DATE minus Timestamp
query P
query ?
SELECT DATE '2023-04-09' - '2000-01-01T00:00:00'::timestamp;
----
0 years 0 mons 8499 days 0 hours 0 mins 0.000000000 secs

# Timestamp minus DATE
query P
query ?
SELECT '2023-01-01T00:00:00'::timestamp - DATE '2021-01-01';
----
0 years 0 mons 730 days 0 hours 0 mins 0.000000000 secs
12 changes: 6 additions & 6 deletions datafusion/core/tests/sqllogictests/test_files/interval.slt
Original file line number Diff line number Diff line change
Expand Up @@ -327,10 +327,10 @@ select '1 month'::interval + '1980-01-01T12:00:00'::timestamp;

# Exected error: interval (scalar) - date / timestamp (scalar)

query error DataFusion error: type_coercion\ncaused by\nError during planning: interval can't subtract timestamp/date
query error DataFusion error: type_coercion\ncaused by\nError during planning: Interval\(MonthDayNano\) \- Date32 can't be evaluated because there isn't a common type to coerce the types to
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I kind of remember this specific error was explicitly requested at some point (and I tried to improve it in #6230)

The error is now misleading -- the error is not because we can't coerce interval and date to a common time, but that subtraction on interval and date is not supported

But on the other hand, the coercion message is pretty confusing to start with, so maybe we could merge this PR and fix it in another 🤔

select '1 month'::interval - '1980-01-01'::date;

query error DataFusion error: type_coercion\ncaused by\nError during planning: interval can't subtract timestamp/date
query error DataFusion error: type_coercion\ncaused by\nError during planning: Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) can't be evaluated because there isn't a common type to coerce the types to
select '1 month'::interval - '1980-01-01T12:00:00'::timestamp;

# interval (array) + date / timestamp (array)
Expand All @@ -349,10 +349,10 @@ select i + ts from t;
2000-02-01T00:01:00

# expected error interval (array) - date / timestamp (array)
query error DataFusion error: type_coercion\ncaused by\nError during planning: interval can't subtract timestamp/date
query error DataFusion error: type_coercion\ncaused by\nError during planning: Interval\(MonthDayNano\) \- Date32 can't be evaluated because there isn't a common type to coerce the types to
select i - d from t;

query error DataFusion error: type_coercion\ncaused by\nError during planning: interval can't subtract timestamp/date
query error DataFusion error: type_coercion\ncaused by\nError during planning: Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) can't be evaluated because there isn't a common type to coerce the types to
select i - ts from t;


Expand All @@ -372,10 +372,10 @@ select '1 month'::interval + ts from t;
2000-03-01T00:00:00

# expected error interval (scalar) - date / timestamp (array)
query error DataFusion error: type_coercion\ncaused by\nError during planning: interval can't subtract timestamp/date
query error DataFusion error: type_coercion\ncaused by\nError during planning: Interval\(MonthDayNano\) \- Date32 can't be evaluated because there isn't a common type to coerce the types to
select '1 month'::interval - d from t;

query error DataFusion error: type_coercion\ncaused by\nError during planning: interval can't subtract timestamp/date
query error DataFusion error: type_coercion\ncaused by\nError during planning: Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) can't be evaluated because there isn't a common type to coerce the types to
select '1 month'::interval - ts from t;

statement ok
Expand Down
4 changes: 2 additions & 2 deletions datafusion/core/tests/sqllogictests/test_files/timestamps.slt
Original file line number Diff line number Diff line change
Expand Up @@ -1014,7 +1014,7 @@ SELECT ts1 + i FROM foo;
2003-07-12T01:31:15.000123463

# Timestamp + Timestamp => error
statement error DataFusion error: Error during planning: Unsupported argument types\. Can not evaluate Timestamp\(Nanosecond, None\) \+ Timestamp\(Nanosecond, None\)
query error DataFusion error: type_coercion\ncaused by\nInternal error: Unsupported operation Plus between Timestamp\(Nanosecond, None\) and Timestamp\(Nanosecond, None\)\. This was likely caused by a bug in DataFusion's code and we would welcome that you file an bug report in our issue tracker
SELECT ts1 + ts2
FROM foo;

Expand All @@ -1031,7 +1031,7 @@ SELECT '2000-01-01T00:00:00'::timestamp - '2010-01-01T00:00:00'::timestamp;
0 years 0 mons -3653 days 0 hours 0 mins 0.000000000 secs

# Interval - Timestamp => error
statement error DataFusion error: type_coercion\ncaused by\nError during planning: interval can't subtract timestamp/date
statement error DataFusion error: type_coercion\ncaused by\nError during planning: Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) can't be evaluated because there isn't a common type to coerce the types to
SELECT i - ts1 from FOO;

statement ok
Expand Down
180 changes: 80 additions & 100 deletions datafusion/expr/src/type_coercion/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,70 @@

use arrow::compute::can_cast_types;
use arrow::datatypes::{
DataType, IntervalUnit, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE,
DataType, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE,
};

use datafusion_common::DataFusionError;
use datafusion_common::Result;

use crate::type_coercion::{
is_datetime, is_decimal, is_interval, is_numeric, is_timestamp,
};
use crate::type_coercion::{is_datetime, is_decimal, is_interval, is_numeric};
use crate::Operator;

/// Returns the result type of applying mathematics operations such as
/// `+` to arguments of `lhs_type` and `rhs_type`.
fn mathematics_temporal_result_type(
lhs_type: &DataType,
rhs_type: &DataType,
) -> Option<DataType> {
use arrow::datatypes::DataType::*;
use arrow::datatypes::IntervalUnit::*;
use arrow::datatypes::TimeUnit::*;

if !is_interval(lhs_type)
&& !is_interval(rhs_type)
&& !is_datetime(lhs_type)
&& !is_datetime(rhs_type)
{
return None;
};

match (lhs_type, rhs_type) {
// datetime +/- interval
(Interval(_), Timestamp(_, _)) => Some(rhs_type.clone()),
(Timestamp(_, _), Interval(_)) => Some(lhs_type.clone()),
(Interval(_), Date32) => Some(rhs_type.clone()),
(Date32, Interval(_)) => Some(lhs_type.clone()),
(Interval(_), Date64) => Some(rhs_type.clone()),
(Date64, Interval(_)) => Some(lhs_type.clone()),
// interval +/-
(Interval(YearMonth), Interval(YearMonth)) => Some(Interval(YearMonth)),
(Interval(DayTime), Interval(DayTime)) => Some(Interval(DayTime)),
(Interval(_), Interval(_)) => Some(Interval(MonthDayNano)),
// timestamp - timestamp
(Timestamp(Second, _), Timestamp(Second, _))
| (Timestamp(Millisecond, _), Timestamp(Millisecond, _)) => {
Some(Interval(DayTime))
}
(Timestamp(Microsecond, _), Timestamp(Microsecond, _))
| (Timestamp(Nanosecond, _), Timestamp(Nanosecond, _)) => {
Some(Interval(MonthDayNano))
}
(Timestamp(_, _), Timestamp(_, _)) => None,
// TODO: date minus date
// date - timestamp, timestamp - date
(Date32, Timestamp(_, _))
| (Timestamp(_, _), Date32)
| (Date64, Timestamp(_, _))
| (Timestamp(_, _), Date64) => {
// TODO: make get_result_type must after coerce type.
// if type isn't coerced, we need get common type, and then get result type.
let common_type = temporal_coercion(lhs_type, rhs_type);
common_type.and_then(|t| mathematics_temporal_result_type(&t, &t))
}
_ => None,
}
}

/// returns the resulting type of a binary expression evaluating the `op` with the left and right hand types
pub fn get_result_type(
lhs_type: &DataType,
Expand Down Expand Up @@ -67,12 +120,12 @@ pub fn get_result_type(
| Operator::IsDistinctFrom
| Operator::IsNotDistinctFrom => Some(DataType::Boolean),
Operator::Plus | Operator::Minus
if is_datetime(lhs_type)
|| is_datetime(rhs_type)
|| is_interval(lhs_type)
|| is_interval(rhs_type) =>
if is_datetime(lhs_type) && is_datetime(rhs_type)
|| (is_interval(lhs_type) && is_interval(rhs_type))
|| (is_datetime(lhs_type) && is_interval(rhs_type))
|| (is_interval(lhs_type) && is_datetime(rhs_type)) =>
{
temporal_add_sub_coercion(lhs_type, rhs_type, op)
mathematics_temporal_result_type(lhs_type, rhs_type)
}
// following same with `coerce_types`
Operator::BitwiseAnd
Expand Down Expand Up @@ -126,19 +179,13 @@ pub fn coerce_types(
| Operator::LtEq
| Operator::IsDistinctFrom
| Operator::IsNotDistinctFrom => comparison_coercion(lhs_type, rhs_type),
// interval - timestamp is an erroneous case, cannot coerce a type
Operator::Plus | Operator::Minus
if is_datetime(lhs_type)
|| is_datetime(rhs_type)
|| is_interval(lhs_type)
|| is_interval(rhs_type) =>
if is_interval(lhs_type) && is_interval(rhs_type) =>
{
if is_interval(lhs_type) && is_datetime(rhs_type) && *op == Operator::Minus {
return Err(DataFusionError::Plan(
"interval can't subtract timestamp/date".to_string(),
));
}
Comment on lines -131 to -140
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Original code in here exist two motivation

  • get result type
  • get common type

I separate it.

temporal_add_sub_coercion(lhs_type, rhs_type, op)
temporal_coercion(lhs_type, rhs_type)
}
Operator::Minus if is_datetime(lhs_type) && is_datetime(rhs_type) => {
temporal_coercion(lhs_type, rhs_type)
}
// for math expressions, the final value of the coercion is also the return type
// because coercion favours higher information types
Expand Down Expand Up @@ -208,7 +255,10 @@ pub fn math_decimal_coercion(

/// Returns the output type of applying bitwise operations such as
/// `&`, `|`, or `xor`to arguments of `lhs_type` and `rhs_type`.
fn bitwise_coercion(left_type: &DataType, right_type: &DataType) -> Option<DataType> {
pub(crate) fn bitwise_coercion(
left_type: &DataType,
right_type: &DataType,
) -> Option<DataType> {
use arrow::datatypes::DataType::*;

if !both_numeric_or_null_and_numeric(left_type, right_type) {
Expand Down Expand Up @@ -260,80 +310,6 @@ pub fn comparison_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<D
.or_else(|| string_numeric_coercion(lhs_type, rhs_type))
}

// This function performs temporal coercion between the two input data types and the provided operator.
// It returns None (it will convert a Err outside) if the operands are an unsupported/wrong operation.
// If the coercion is possible, it returns a new data type as Some(DataType).
pub fn temporal_add_sub_coercion(
lhs_type: &DataType,
rhs_type: &DataType,
op: &Operator,
) -> Option<DataType> {
match (lhs_type, rhs_type, op) {
// if an interval is being added/subtracted from a date/timestamp, return the date/timestamp data type
(lhs, rhs, _) if is_interval(lhs) && is_datetime(rhs) => Some(rhs.clone()),
(lhs, rhs, _) if is_interval(rhs) && is_datetime(lhs) => Some(lhs.clone()),
// if two timestamps are being subtracted, check their time units and return the corresponding interval data type
(lhs, rhs, Operator::Minus) if is_timestamp(lhs) && is_timestamp(rhs) => {
handle_timestamp_minus(lhs, rhs)
}
// if two intervals are being added/subtracted, check their interval units and return the corresponding interval data type
(lhs, rhs, _) if is_interval(lhs) && is_interval(rhs) => {
handle_interval_addition(lhs, rhs)
}
(lhs, rhs, Operator::Minus) if is_datetime(lhs) && is_datetime(rhs) => {
temporal_coercion(lhs, rhs)
}
// return None if no coercion is possible
_ => None,
}
}

// This function checks if two interval data types have the same interval unit and returns an interval data type
// representing the sum of them. If the two interval data types have different units, it returns an interval data type
// with "IntervalUnit::MonthDayNano". If the two interval data types are already "IntervalUnit::YearMonth" or "IntervalUnit::DayTime",
// it returns an interval data type with the same unit as the operands.
fn handle_interval_addition(lhs: &DataType, rhs: &DataType) -> Option<DataType> {
match (lhs, rhs) {
// operation with the same types
(
DataType::Interval(IntervalUnit::YearMonth),
DataType::Interval(IntervalUnit::YearMonth),
) => Some(DataType::Interval(IntervalUnit::YearMonth)),
(
DataType::Interval(IntervalUnit::DayTime),
DataType::Interval(IntervalUnit::DayTime),
) => Some(DataType::Interval(IntervalUnit::DayTime)),
// operation with MonthDayNano's or different types
(_, _) => Some(DataType::Interval(IntervalUnit::MonthDayNano)),
}
}

// This function checks if two timestamp data types have the same time unit and returns an interval data type
// representing the difference between them, either "IntervalUnit::DayTime" if the time unit is second or millisecond,
// or "IntervalUnit::MonthDayNano" if the time unit is microsecond or nanosecond. If the two timestamp data types have
// different time units, it returns an error indicating that "The timestamps have different types".
fn handle_timestamp_minus(lhs: &DataType, rhs: &DataType) -> Option<DataType> {
match (lhs, rhs) {
(
DataType::Timestamp(TimeUnit::Second, _),
DataType::Timestamp(TimeUnit::Second, _),
)
| (
DataType::Timestamp(TimeUnit::Millisecond, _),
DataType::Timestamp(TimeUnit::Millisecond, _),
) => Some(DataType::Interval(IntervalUnit::DayTime)),
(
DataType::Timestamp(TimeUnit::Microsecond, _),
DataType::Timestamp(TimeUnit::Microsecond, _),
)
| (
DataType::Timestamp(TimeUnit::Nanosecond, _),
DataType::Timestamp(TimeUnit::Nanosecond, _),
) => Some(DataType::Interval(IntervalUnit::MonthDayNano)),
(_, _) => None,
}
}

/// Returns the output type of applying numeric operations such as `=`
/// to arguments `lhs_type` and `rhs_type` if one is numeric and one
/// is `Utf8`/`LargeUtf8`.
Expand Down Expand Up @@ -777,9 +753,15 @@ fn is_time_with_valid_unit(datatype: DataType) -> bool {

/// Coercion rules for Temporal columns: the type that both lhs and rhs can be
/// casted to for the purpose of a date computation
/// For interval arithmetic, it doesn't handle datetime type +/- interval
fn temporal_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
use arrow::datatypes::DataType::*;
use arrow::datatypes::IntervalUnit::*;
match (lhs_type, rhs_type) {
// interval +/-
(Interval(YearMonth), Interval(YearMonth)) => Some(Interval(YearMonth)),
(Interval(DayTime), Interval(DayTime)) => Some(Interval(DayTime)),
(Interval(_), Interval(_)) => Some(Interval(MonthDayNano)),
(Date64, Date32) | (Date32, Date64) => Some(Date64),
(Utf8, Date32) | (Date32, Utf8) => Some(Date32),
(Utf8, Date64) | (Date64, Utf8) => Some(Date64),
Expand Down Expand Up @@ -1055,14 +1037,12 @@ mod tests {

#[test]
fn test_date_timestamp_arithmetic_error() -> Result<()> {
let err = coerce_types(
let common_type = coerce_types(
&DataType::Timestamp(TimeUnit::Nanosecond, None),
&Operator::Minus,
&DataType::Timestamp(TimeUnit::Millisecond, None),
)
.unwrap_err()
.to_string();
assert_contains!(&err, "Timestamp(Nanosecond, None) - Timestamp(Millisecond, None) can't be evaluated because there isn't a common type to coerce the types to");
)?;
assert_eq!(common_type.to_string(), "Timestamp(Millisecond, None)");

let err = coerce_types(&DataType::Date32, &Operator::Plus, &DataType::Date64)
.unwrap_err()
Expand Down