From 97bee2a123eb0dc5846258ab9905f1d7d8b453ec Mon Sep 17 00:00:00 2001 From: DuRipeng <453243496@qq.com> Date: Fri, 27 May 2022 21:54:37 +0800 Subject: [PATCH] Like, NotLike expressions work with literal `NULL` (#2627) * like / not like work with NULL * add more ut --- datafusion/core/tests/sql/expr.rs | 30 +++++++++++++++++++ .../src/coercion_rule/binary_rule.rs | 1 + .../physical-expr/src/expressions/binary.rs | 16 +++++----- 3 files changed, 40 insertions(+), 7 deletions(-) diff --git a/datafusion/core/tests/sql/expr.rs b/datafusion/core/tests/sql/expr.rs index 760a02872006..90930f6bb581 100644 --- a/datafusion/core/tests/sql/expr.rs +++ b/datafusion/core/tests/sql/expr.rs @@ -1346,6 +1346,36 @@ async fn nested_subquery() -> Result<()> { Ok(()) } +#[tokio::test] +async fn like_nlike_with_null_lt() { + let ctx = SessionContext::new(); + let sql = "SELECT column1 like NULL as col_null, NULL like column1 as null_col from (values('a'), ('b'), (NULL)) as t"; + let actual = execute_to_batches(&ctx, sql).await; + let expected = vec![ + "+----------+----------+", + "| col_null | null_col |", + "+----------+----------+", + "| | |", + "| | |", + "| | |", + "+----------+----------+", + ]; + assert_batches_eq!(expected, &actual); + + let sql = "SELECT column1 not like NULL as col_null, NULL not like column1 as null_col from (values('a'), ('b'), (NULL)) as t"; + let actual = execute_to_batches(&ctx, sql).await; + let expected = vec![ + "+----------+----------+", + "| col_null | null_col |", + "+----------+----------+", + "| | |", + "| | |", + "| | |", + "+----------+----------+", + ]; + assert_batches_eq!(expected, &actual); +} + #[tokio::test] async fn comparisons_with_null_lt() { let ctx = SessionContext::new(); diff --git a/datafusion/physical-expr/src/coercion_rule/binary_rule.rs b/datafusion/physical-expr/src/coercion_rule/binary_rule.rs index 50ed20425d2c..fd3684ba9ba8 100644 --- a/datafusion/physical-expr/src/coercion_rule/binary_rule.rs +++ b/datafusion/physical-expr/src/coercion_rule/binary_rule.rs @@ -450,6 +450,7 @@ fn string_boolean_equality_coercion( fn like_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option { string_coercion(lhs_type, rhs_type) .or_else(|| dictionary_coercion(lhs_type, rhs_type)) + .or_else(|| null_coercion(lhs_type, rhs_type)) } /// Coercion rules for Temporal columns: the type that both lhs and rhs can be diff --git a/datafusion/physical-expr/src/expressions/binary.rs b/datafusion/physical-expr/src/expressions/binary.rs index 309ae1cb4b49..068b1823219e 100644 --- a/datafusion/physical-expr/src/expressions/binary.rs +++ b/datafusion/physical-expr/src/expressions/binary.rs @@ -821,7 +821,7 @@ macro_rules! compute_utf8_op { /// Invoke a compute kernel on a data array and a scalar value macro_rules! compute_utf8_op_scalar { - ($LEFT:expr, $RIGHT:expr, $OP:ident, $DT:ident) => {{ + ($LEFT:expr, $RIGHT:expr, $OP:ident, $DT:ident, $OP_TYPE:expr) => {{ let ll = $LEFT .as_any() .downcast_ref::<$DT>() @@ -831,6 +831,8 @@ macro_rules! compute_utf8_op_scalar { &ll, &string_value, )?)) + } else if $RIGHT.is_null() { + Ok(Arc::new(new_null_array($OP_TYPE, $LEFT.len()))) } else { Err(DataFusionError::Internal(format!( "compute_utf8_op_scalar for '{}' failed to cast literal value {}", @@ -960,9 +962,9 @@ macro_rules! compute_op { } macro_rules! binary_string_array_op_scalar { - ($LEFT:expr, $RIGHT:expr, $OP:ident) => {{ + ($LEFT:expr, $RIGHT:expr, $OP:ident, $OP_TYPE:expr) => {{ let result: Result> = match $LEFT.data_type() { - DataType::Utf8 => compute_utf8_op_scalar!($LEFT, $RIGHT, $OP, StringArray), + DataType::Utf8 => compute_utf8_op_scalar!($LEFT, $RIGHT, $OP, StringArray, $OP_TYPE), other => Err(DataFusionError::Internal(format!( "Data type {:?} not supported for scalar operation '{}' on string array", other, stringify!($OP) @@ -1364,16 +1366,16 @@ impl BinaryExpr { binary_array_op_dyn_scalar!(array, scalar.clone(), neq, bool_type) } Operator::Like => { - binary_string_array_op_scalar!(array, scalar.clone(), like) + binary_string_array_op_scalar!(array, scalar.clone(), like, bool_type) } Operator::NotLike => { - binary_string_array_op_scalar!(array, scalar.clone(), nlike) + binary_string_array_op_scalar!(array, scalar.clone(), nlike, bool_type) } Operator::ILike => { - binary_string_array_op_scalar!(array, scalar.clone(), ilike) + binary_string_array_op_scalar!(array, scalar.clone(), ilike, bool_type) } Operator::NotILike => { - binary_string_array_op_scalar!(array, scalar.clone(), nilike) + binary_string_array_op_scalar!(array, scalar.clone(), nilike, bool_type) } Operator::Plus => { binary_primitive_array_op_scalar!(array, scalar.clone(), add)