-
Notifications
You must be signed in to change notification settings - Fork 1.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support to unparse Time scalar value to String #11121
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,13 +15,18 @@ | |
// specific language governing permissions and limitations | ||
// under the License. | ||
|
||
use arrow::datatypes::{Decimal128Type, Decimal256Type, DecimalType}; | ||
use arrow::util::display::array_value_to_string; | ||
use core::fmt; | ||
use std::{fmt::Display, vec}; | ||
|
||
use arrow::datatypes::{Decimal128Type, Decimal256Type, DecimalType}; | ||
use arrow::util::display::array_value_to_string; | ||
use arrow_array::types::{ | ||
ArrowTemporalType, Time32MillisecondType, Time32SecondType, Time64MicrosecondType, | ||
Time64NanosecondType, | ||
}; | ||
use arrow_array::{ | ||
Date32Array, Date64Array, TimestampMillisecondArray, TimestampNanosecondArray, | ||
Date32Array, Date64Array, PrimitiveArray, TimestampMillisecondArray, | ||
TimestampNanosecondArray, | ||
}; | ||
use arrow_schema::DataType; | ||
use sqlparser::ast::Value::SingleQuotedString; | ||
|
@@ -650,6 +655,28 @@ impl Unparser<'_> { | |
} | ||
} | ||
|
||
fn handle_time<T: ArrowTemporalType>(&self, v: &ScalarValue) -> Result<ast::Expr> | ||
where | ||
i64: From<T::Native>, | ||
{ | ||
let time = v | ||
.to_array()? | ||
.as_any() | ||
.downcast_ref::<PrimitiveArray<T>>() | ||
.ok_or(internal_datafusion_err!( | ||
"Failed to downcast type {v:?} to arrow array" | ||
))? | ||
.value_as_time(0) | ||
.ok_or(internal_datafusion_err!("Unable to convert {v:?} to Time"))? | ||
.to_string(); | ||
Ok(ast::Expr::Cast { | ||
kind: ast::CastKind::Cast, | ||
expr: Box::new(ast::Expr::Value(SingleQuotedString(time))), | ||
data_type: ast::DataType::Time(None, TimezoneInfo::None), | ||
format: None, | ||
}) | ||
} | ||
|
||
fn timestamp_string_to_sql(&self, ts: String) -> Result<ast::Expr> { | ||
Ok(ast::Expr::Cast { | ||
kind: ast::CastKind::Cast, | ||
|
@@ -801,23 +828,23 @@ impl Unparser<'_> { | |
} | ||
ScalarValue::Date64(None) => Ok(ast::Expr::Value(ast::Value::Null)), | ||
ScalarValue::Time32Second(Some(_t)) => { | ||
not_impl_err!("Unsupported scalar: {v:?}") | ||
self.handle_time::<Time32SecondType>(v) | ||
} | ||
ScalarValue::Time32Second(None) => Ok(ast::Expr::Value(ast::Value::Null)), | ||
ScalarValue::Time32Millisecond(Some(_t)) => { | ||
not_impl_err!("Unsupported scalar: {v:?}") | ||
self.handle_time::<Time32MillisecondType>(v) | ||
} | ||
ScalarValue::Time32Millisecond(None) => { | ||
Ok(ast::Expr::Value(ast::Value::Null)) | ||
} | ||
ScalarValue::Time64Microsecond(Some(_t)) => { | ||
not_impl_err!("Unsupported scalar: {v:?}") | ||
self.handle_time::<Time64MicrosecondType>(v) | ||
} | ||
ScalarValue::Time64Microsecond(None) => { | ||
Ok(ast::Expr::Value(ast::Value::Null)) | ||
} | ||
ScalarValue::Time64Nanosecond(Some(_t)) => { | ||
not_impl_err!("Unsupported scalar: {v:?}") | ||
self.handle_time::<Time64NanosecondType>(v) | ||
} | ||
ScalarValue::Time64Nanosecond(None) => Ok(ast::Expr::Value(ast::Value::Null)), | ||
ScalarValue::TimestampSecond(Some(_ts), _) => { | ||
|
@@ -1241,6 +1268,22 @@ mod tests { | |
)), | ||
r#"CAST('1970-01-01 08:00:00.000010001 +08:00' AS TIMESTAMP)"#, | ||
), | ||
( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I double checked and the actual types are slightly different (so this code won't round trip) but I can't think of anything better for here > select CAST('00:00:00.010001' AS TIME);
+-------------------------+
| Utf8("00:00:00.010001") |
+-------------------------+
| 00:00:00.010001 |
+-------------------------+
1 row(s) fetched.
Elapsed 0.001 seconds.
> select arrow_typeof(CAST('00:00:00.010001' AS TIME));
+---------------------------------------+
| arrow_typeof(Utf8("00:00:00.010001")) |
+---------------------------------------+
| Time64(Nanosecond) |
+---------------------------------------+
1 row(s) fetched.
Elapsed 0.005 seconds. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the root cause is that we always use nanosecond precision to construct the > select arrow_typeof(CAST('1970-01-01 00:00:00.000010001' AS TIMESTAMP));
+-----------------------------------------------------+
| arrow_typeof(Utf8("1970-01-01 00:00:00.000010001")) |
+-----------------------------------------------------+
| Timestamp(Nanosecond, None) |
+-----------------------------------------------------+
1 row(s) fetched.
Elapsed 0.001 seconds.
> select arrow_typeof(CAST('1970-01-01 00:00:10.001' AS TIMESTAMP));
+-----------------------------------------------+
| arrow_typeof(Utf8("1970-01-01 00:00:10.001")) |
+-----------------------------------------------+
| Timestamp(Nanosecond, None) |
+-----------------------------------------------+
1 row(s) fetched.
Elapsed 0.001 seconds. I tested some ways to construct a TIME value: > select arrow_typeof(TIME '00:00:01.001');
+------------------------------------+
| arrow_typeof(Utf8("00:00:01.001")) |
+------------------------------------+
| Time64(Nanosecond) |
+------------------------------------+
1 row(s) fetched.
Elapsed 0.001 seconds.
> select arrow_typeof(CAST('00:00:01' AS TIME));
+--------------------------------+
| arrow_typeof(Utf8("00:00:01")) |
+--------------------------------+
| Time64(Nanosecond) |
+--------------------------------+
1 row(s) fetched.
Elapsed 0.001 seconds. I think if we want to round-trip the result, we should generate the time or timestamp node of AST with the specific precision. However, I found DataFusion doesn't support constructing a time or timestamp value with the specified precision: > select arrow_typeof(TIME(3) '00:00:01.001');
This feature is not implemented: Unsupported SQL type Time(Some(3), None)
> select arrow_typeof(CAST('00:00:01.001' AS TIME(3)));
This feature is not implemented: Unsupported SQL type Time(Some(3), None)
> select arrow_typeof(CAST('00:00:01.001' AS TIME(6)));
This feature is not implemented: Unsupported SQL type Time(Some(6), None)
> select arrow_typeof(TIMESTAMP(3) '1970-01-01 00:00:10.001');
This feature is not implemented: Unsupported SQL type Timestamp(Some(3), None) I think this could be another issue but is required for expression round-trip. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I agree with this analysis. It is not clear to me that fully preserving the type of timestamps / times etc is necessary for Expr --> text. Thus I think we shouldn't make the code more complicated until we have some actual usecase for trying to preserve the time |
||
Expr::Literal(ScalarValue::Time32Second(Some(10001))), | ||
r#"CAST('02:46:41' AS TIME)"#, | ||
), | ||
( | ||
Expr::Literal(ScalarValue::Time32Millisecond(Some(10001))), | ||
r#"CAST('00:00:10.001' AS TIME)"#, | ||
), | ||
( | ||
Expr::Literal(ScalarValue::Time64Microsecond(Some(10001))), | ||
r#"CAST('00:00:00.010001' AS TIME)"#, | ||
), | ||
( | ||
Expr::Literal(ScalarValue::Time64Nanosecond(Some(10001))), | ||
r#"CAST('00:00:00.000010001' AS TIME)"#, | ||
), | ||
(sum(col("a")), r#"sum(a)"#), | ||
( | ||
count_udaf() | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We can assign the precision here like
It can be converted to String well.