From 806288557c3ddfb085f3b86dfc1f14b17981b2c1 Mon Sep 17 00:00:00 2001 From: Stuart Carnie Date: Tue, 2 Aug 2022 13:49:45 +1000 Subject: [PATCH 1/3] feat: Initial work to add TIME literal value support --- datafusion/common/src/scalar.rs | 32 ++++++++++++++++++++++++++ datafusion/core/tests/sql/timestamp.rs | 20 ++++++++++++++++ datafusion/sql/src/planner.rs | 12 +++++++++- 3 files changed, 63 insertions(+), 1 deletion(-) diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs index 3069a54f491f..dff97d2f981f 100644 --- a/datafusion/common/src/scalar.rs +++ b/datafusion/common/src/scalar.rs @@ -83,6 +83,8 @@ pub enum ScalarValue { Date32(Option), /// Date stored as a signed 64bit int milliseconds since UNIX epoch 1970-01-01 Date64(Option), + /// Time stored as a signed 64bit int as nanoseconds since midnight + Time64(Option), /// Timestamp Second TimestampSecond(Option, Option), /// Timestamp Milliseconds @@ -163,6 +165,8 @@ impl PartialEq for ScalarValue { (Date32(_), _) => false, (Date64(v1), Date64(v2)) => v1.eq(v2), (Date64(_), _) => false, + (Time64(v1), Time64(v2)) => v1.eq(v2), + (Time64(_), _) => false, (TimestampSecond(v1, _), TimestampSecond(v2, _)) => v1.eq(v2), (TimestampSecond(_, _), _) => false, (TimestampMillisecond(v1, _), TimestampMillisecond(v2, _)) => v1.eq(v2), @@ -255,6 +259,8 @@ impl PartialOrd for ScalarValue { (Date32(_), _) => None, (Date64(v1), Date64(v2)) => v1.partial_cmp(v2), (Date64(_), _) => None, + (Time64(v1), Time64(v2)) => v1.partial_cmp(v2), + (Time64(_), _) => None, (TimestampSecond(v1, _), TimestampSecond(v2, _)) => v1.partial_cmp(v2), (TimestampSecond(_, _), _) => None, (TimestampMillisecond(v1, _), TimestampMillisecond(v2, _)) => { @@ -338,6 +344,7 @@ impl std::hash::Hash for ScalarValue { } Date32(v) => v.hash(state), Date64(v) => v.hash(state), + Time64(v) => v.hash(state), TimestampSecond(v, _) => v.hash(state), TimestampMillisecond(v, _) => v.hash(state), TimestampMicrosecond(v, _) => v.hash(state), @@ -681,6 +688,7 @@ impl ScalarValue { ))), ScalarValue::Date32(_) => DataType::Date32, ScalarValue::Date64(_) => DataType::Date64, + ScalarValue::Time64(_) => DataType::Time64(TimeUnit::Nanosecond), ScalarValue::IntervalYearMonth(_) => { DataType::Interval(IntervalUnit::YearMonth) } @@ -741,6 +749,7 @@ impl ScalarValue { ScalarValue::List(v, _) => v.is_none(), ScalarValue::Date32(v) => v.is_none(), ScalarValue::Date64(v) => v.is_none(), + ScalarValue::Time64(v) => v.is_none(), ScalarValue::TimestampSecond(v, _) => v.is_none(), ScalarValue::TimestampMillisecond(v, _) => v.is_none(), ScalarValue::TimestampMicrosecond(v, _) => v.is_none(), @@ -963,6 +972,9 @@ impl ScalarValue { DataType::LargeBinary => build_array_string!(LargeBinaryArray, LargeBinary), DataType::Date32 => build_array_primitive!(Date32Array, Date32), DataType::Date64 => build_array_primitive!(Date64Array, Date64), + DataType::Time64(TimeUnit::Nanosecond) => { + build_array_primitive!(Time64NanosecondArray, Time64) + } DataType::Timestamp(TimeUnit::Second, _) => { build_array_primitive_tz!(TimestampSecondArray, TimestampSecond) } @@ -1357,6 +1369,15 @@ impl ScalarValue { ScalarValue::Date64(e) => { build_array_from_option!(Date64, Date64Array, e, size) } + ScalarValue::Time64(e) => { + build_array_from_option!( + Time64, + TimeUnit::Nanosecond, + Time64NanosecondArray, + e, + size + ) + } ScalarValue::IntervalDayTime(e) => build_array_from_option!( Interval, IntervalUnit::DayTime, @@ -1496,6 +1517,9 @@ impl ScalarValue { DataType::Date64 => { typed_cast!(array, index, Date64Array, Date64) } + DataType::Time64(TimeUnit::Nanosecond) => { + typed_cast!(array, index, Time64NanosecondArray, Time64) + } DataType::Timestamp(TimeUnit::Second, tz_opt) => { typed_cast_tz!( array, @@ -1691,6 +1715,9 @@ impl ScalarValue { ScalarValue::Date64(val) => { eq_array_primitive!(array, index, Date64Array, val) } + ScalarValue::Time64(val) => { + eq_array_primitive!(array, index, Time64NanosecondArray, val) + } ScalarValue::TimestampSecond(val, _) => { eq_array_primitive!(array, index, TimestampSecondArray, val) } @@ -1845,6 +1872,7 @@ impl TryFrom for i64 { match value { ScalarValue::Int64(Some(inner_value)) | ScalarValue::Date64(Some(inner_value)) + | ScalarValue::Time64(Some(inner_value)) | ScalarValue::TimestampNanosecond(Some(inner_value), _) | ScalarValue::TimestampMicrosecond(Some(inner_value), _) | ScalarValue::TimestampMillisecond(Some(inner_value), _) @@ -1906,6 +1934,7 @@ impl TryFrom<&DataType> for ScalarValue { DataType::LargeUtf8 => ScalarValue::LargeUtf8(None), DataType::Date32 => ScalarValue::Date32(None), DataType::Date64 => ScalarValue::Date64(None), + DataType::Time64(TimeUnit::Nanosecond) => ScalarValue::Time64(None), DataType::Timestamp(TimeUnit::Second, tz_opt) => { ScalarValue::TimestampSecond(None, tz_opt.clone()) } @@ -2007,6 +2036,7 @@ impl fmt::Display for ScalarValue { }, ScalarValue::Date32(e) => format_option!(f, e)?, ScalarValue::Date64(e) => format_option!(f, e)?, + ScalarValue::Time64(e) => format_option!(f, e)?, ScalarValue::IntervalDayTime(e) => format_option!(f, e)?, ScalarValue::IntervalYearMonth(e) => format_option!(f, e)?, ScalarValue::IntervalMonthDayNano(e) => format_option!(f, e)?, @@ -2067,6 +2097,7 @@ impl fmt::Debug for ScalarValue { ScalarValue::List(_, _) => write!(f, "List([{}])", self), ScalarValue::Date32(_) => write!(f, "Date32(\"{}\")", self), ScalarValue::Date64(_) => write!(f, "Date64(\"{}\")", self), + ScalarValue::Time64(_) => write!(f, "Time64(\"{}\")", self), ScalarValue::IntervalDayTime(_) => { write!(f, "IntervalDayTime(\"{}\")", self) } @@ -2665,6 +2696,7 @@ mod tests { make_binary_test_case!(str_vals, LargeBinaryArray, LargeBinary), make_test_case!(i32_vals, Date32Array, Date32), make_test_case!(i64_vals, Date64Array, Date64), + make_test_case!(i64_vals, Time64NanosecondArray, Time64), make_test_case!(i64_vals, TimestampSecondArray, TimestampSecond, None), make_test_case!( i64_vals, diff --git a/datafusion/core/tests/sql/timestamp.rs b/datafusion/core/tests/sql/timestamp.rs index 257fa4658af4..551a822821f4 100644 --- a/datafusion/core/tests/sql/timestamp.rs +++ b/datafusion/core/tests/sql/timestamp.rs @@ -985,6 +985,26 @@ async fn sub_interval_day() -> Result<()> { Ok(()) } +#[tokio::test] +async fn cast_string_to_time() -> Result<()> { + let ctx = SessionContext::new(); + + let sql = "select time '08:09:10.123456789' as time;"; + let results = execute_to_batches(&ctx, sql).await; + + let expected = vec![ + "+--------------------+", + "| time |", + "+--------------------+", + "| 08:09:10.123456789 |", + "+--------------------+", + ]; + + assert_batches_eq!(expected, &results); + + Ok(()) +} + #[tokio::test] async fn cast_to_timestamp_twice() -> Result<()> { let ctx = SessionContext::new(); diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index daa4753e4bfd..6acc402a3791 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -496,7 +496,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { SQLDataType::Double => Ok(DataType::Float64), SQLDataType::Boolean => Ok(DataType::Boolean), SQLDataType::Date => Ok(DataType::Date32), - SQLDataType::Time => Ok(DataType::Time64(TimeUnit::Millisecond)), + SQLDataType::Time => Ok(DataType::Time64(TimeUnit::Nanosecond)), SQLDataType::Timestamp => Ok(DataType::Timestamp(TimeUnit::Nanosecond, None)), _ => Err(DataFusionError::NotImplemented(format!( "The SQL data type {:?} is not implemented", @@ -2561,6 +2561,7 @@ pub fn convert_simple_data_type(sql_type: &SQLDataType) -> Result { | SQLDataType::String => Ok(DataType::Utf8), SQLDataType::Timestamp => Ok(DataType::Timestamp(TimeUnit::Nanosecond, None)), SQLDataType::Date => Ok(DataType::Date32), + SQLDataType::Time => Ok(DataType::Time64(TimeUnit::Nanosecond)), SQLDataType::Decimal(precision, scale) => make_decimal_type(*precision, *scale), SQLDataType::Binary(_) => Ok(DataType::Binary), SQLDataType::Bytea => Ok(DataType::Binary), @@ -4365,6 +4366,15 @@ mod tests { quick_test(sql, expected); } + #[test] + fn select_typedstring_time() { + let sql = "SELECT TIME '08:09:10.123' AS time FROM person"; + let expected = + "Projection: CAST(Utf8(\"08:09:10.123\") AS Time64(Nanosecond)) AS time\ + \n TableScan: person"; + quick_test(sql, expected); + } + #[test] fn select_multibyte_column() { let sql = r#"SELECT "😀" FROM person"#; From b7503195871c9ffae3ecc358911b110a82d217fa Mon Sep 17 00:00:00 2001 From: Stuart Carnie Date: Mon, 15 Aug 2022 16:04:38 +1000 Subject: [PATCH 2/3] chore: Improve unit test coverage --- datafusion/core/tests/sql/timestamp.rs | 36 +++++++++++++++++++------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/datafusion/core/tests/sql/timestamp.rs b/datafusion/core/tests/sql/timestamp.rs index 551a822821f4..1214dd4ba2e8 100644 --- a/datafusion/core/tests/sql/timestamp.rs +++ b/datafusion/core/tests/sql/timestamp.rs @@ -986,23 +986,41 @@ async fn sub_interval_day() -> Result<()> { } #[tokio::test] -async fn cast_string_to_time() -> Result<()> { +async fn cast_string_to_time() { let ctx = SessionContext::new(); - let sql = "select time '08:09:10.123456789' as time;"; + let sql = "select \ + time '08:09:10.123456789' as time_nano, \ + time '13:14:15.123456' as time_micro,\ + time '13:14:15.123' as time_milli,\ + time '13:14:15' as time;"; let results = execute_to_batches(&ctx, sql).await; let expected = vec![ - "+--------------------+", - "| time |", - "+--------------------+", - "| 08:09:10.123456789 |", - "+--------------------+", + "+--------------------+-----------------+--------------+----------+", + "| time_nano | time_micro | time_milli | time |", + "+--------------------+-----------------+--------------+----------+", + "| 08:09:10.123456789 | 13:14:15.123456 | 13:14:15.123 | 13:14:15 |", + "+--------------------+-----------------+--------------+----------+", ]; - assert_batches_eq!(expected, &results); - Ok(()) + // Fallible cases + + let sql = "SELECT TIME 'not a time' as time;"; + let result = try_execute_to_batches(&ctx, sql).await; + assert_eq!( + result.err().unwrap().to_string(), + "Arrow error: Cast error: Cannot cast string 'not a time' to value of Time64(Nanosecond) type" + ); + + // An invalid time + let sql = "SELECT TIME '24:01:02' as time;"; + let result = try_execute_to_batches(&ctx, sql).await; + assert_eq!( + result.err().unwrap().to_string(), + "Arrow error: Cast error: Cannot cast string '24:01:02' to value of Time64(Nanosecond) type" + ); } #[tokio::test] From cc8caadedd3b271168f495a5bffe9a1e9cd44e7e Mon Sep 17 00:00:00 2001 From: Stuart Carnie Date: Mon, 15 Aug 2022 16:32:39 +1000 Subject: [PATCH 3/3] chore: Improve test names and simplify test --- datafusion/sql/src/planner.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index 6acc402a3791..f1d5de4e647e 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -4359,19 +4359,19 @@ mod tests { } #[test] - fn select_typedstring() { - let sql = "SELECT date '2020-12-10' AS date FROM person"; + fn select_typed_date_string() { + let sql = "SELECT date '2020-12-10' AS date"; let expected = "Projection: CAST(Utf8(\"2020-12-10\") AS Date32) AS date\ - \n TableScan: person"; + \n EmptyRelation"; quick_test(sql, expected); } #[test] - fn select_typedstring_time() { - let sql = "SELECT TIME '08:09:10.123' AS time FROM person"; + fn select_typed_time_string() { + let sql = "SELECT TIME '08:09:10.123' AS time"; let expected = "Projection: CAST(Utf8(\"08:09:10.123\") AS Time64(Nanosecond)) AS time\ - \n TableScan: person"; + \n EmptyRelation"; quick_test(sql, expected); }