diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 8b90fb9e4dfa..8cdf53c26ed3 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -497,6 +497,36 @@ impl Expr { } } + /// Return `IsTrue(Box(self))` + pub fn is_true(self) -> Expr { + Expr::IsTrue(Box::new(self)) + } + + /// Return `IsNotTrue(Box(self))` + pub fn is_not_true(self) -> Expr { + Expr::IsNotTrue(Box::new(self)) + } + + /// Return `IsFalse(Box(self))` + pub fn is_false(self) -> Expr { + Expr::IsFalse(Box::new(self)) + } + + /// Return `IsNotFalse(Box(self))` + pub fn is_not_false(self) -> Expr { + Expr::IsNotFalse(Box::new(self)) + } + + /// Return `IsUnknown(Box(self))` + pub fn is_unknown(self) -> Expr { + Expr::IsUnknown(Box::new(self)) + } + + /// Return `IsNotUnknown(Box(self))` + pub fn is_not_unknown(self) -> Expr { + Expr::IsNotUnknown(Box::new(self)) + } + pub fn try_into_col(&self) -> Result { match self { Expr::Column(it) => Ok(it.clone()), diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index 8d89f6f4144e..7b11b630464a 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -282,6 +282,36 @@ pub fn is_null(expr: Expr) -> Expr { Expr::IsNull(Box::new(expr)) } +/// Create is true expression +pub fn is_true(expr: Expr) -> Expr { + Expr::IsTrue(Box::new(expr)) +} + +/// Create is not true expression +pub fn is_not_true(expr: Expr) -> Expr { + Expr::IsNotTrue(Box::new(expr)) +} + +/// Create is false expression +pub fn is_false(expr: Expr) -> Expr { + Expr::IsFalse(Box::new(expr)) +} + +/// Create is not false expression +pub fn is_not_false(expr: Expr) -> Expr { + Expr::IsNotFalse(Box::new(expr)) +} + +/// Create is unknown expression +pub fn is_unknown(expr: Expr) -> Expr { + Expr::IsUnknown(Box::new(expr)) +} + +/// Create is not unknown expression +pub fn is_not_unknown(expr: Expr) -> Expr { + Expr::IsNotUnknown(Box::new(expr)) +} + /// Create an convenience function representing a unary scalar function macro_rules! unary_scalar_expr { ($ENUM:ident, $FUNC:ident, $DOC:expr) => { diff --git a/datafusion/optimizer/src/type_coercion.rs b/datafusion/optimizer/src/type_coercion.rs index 9d8e97f36a71..7be327713183 100644 --- a/datafusion/optimizer/src/type_coercion.rs +++ b/datafusion/optimizer/src/type_coercion.rs @@ -25,7 +25,10 @@ use datafusion_expr::binary_rule::{coerce_types, comparison_coercion}; use datafusion_expr::expr_rewriter::{ExprRewritable, ExprRewriter, RewriteRecursion}; use datafusion_expr::type_coercion::data_types; use datafusion_expr::utils::from_plan; -use datafusion_expr::{Expr, LogicalPlan}; +use datafusion_expr::{ + is_false, is_not_false, is_not_true, is_not_unknown, is_true, is_unknown, Expr, + LogicalPlan, Operator, +}; use datafusion_expr::{ExprSchemable, Signature}; use datafusion_physical_expr::execution_props::ExecutionProps; use std::sync::Arc; @@ -98,6 +101,81 @@ impl ExprRewriter for TypeCoercionRewriter<'_> { fn mutate(&mut self, expr: Expr) -> Result { match expr { + Expr::IsTrue(expr) => { + let expr = is_true(get_casted_expr_for_bool_op(&expr, &self.schema)?); + expr.rewrite(&mut self.const_evaluator) + } + Expr::IsNotTrue(expr) => { + let expr = is_not_true(get_casted_expr_for_bool_op(&expr, &self.schema)?); + expr.rewrite(&mut self.const_evaluator) + } + Expr::IsFalse(expr) => { + let expr = is_false(get_casted_expr_for_bool_op(&expr, &self.schema)?); + expr.rewrite(&mut self.const_evaluator) + } + Expr::IsNotFalse(expr) => { + let expr = + is_not_false(get_casted_expr_for_bool_op(&expr, &self.schema)?); + expr.rewrite(&mut self.const_evaluator) + } + Expr::Like { + negated, + expr, + pattern, + escape_char, + } => { + let left_type = expr.get_type(&self.schema)?; + let right_type = pattern.get_type(&self.schema)?; + let coerced_type = + coerce_types(&left_type, &Operator::Like, &right_type)?; + let expr = Box::new(expr.cast_to(&coerced_type, &self.schema)?); + let pattern = Box::new(pattern.cast_to(&coerced_type, &self.schema)?); + let expr = Expr::Like { + negated, + expr, + pattern, + escape_char, + }; + expr.rewrite(&mut self.const_evaluator) + } + Expr::ILike { + negated, + expr, + pattern, + escape_char, + } => { + let left_type = expr.get_type(&self.schema)?; + let right_type = pattern.get_type(&self.schema)?; + let coerced_type = + coerce_types(&left_type, &Operator::Like, &right_type)?; + let expr = Box::new(expr.cast_to(&coerced_type, &self.schema)?); + let pattern = Box::new(pattern.cast_to(&coerced_type, &self.schema)?); + let expr = Expr::ILike { + negated, + expr, + pattern, + escape_char, + }; + expr.rewrite(&mut self.const_evaluator) + } + Expr::IsUnknown(expr) => { + // will convert the binary(expr,IsNotDistinctFrom,lit(Boolean(None)); + let left_type = expr.get_type(&self.schema)?; + let right_type = DataType::Boolean; + let coerced_type = + coerce_types(&left_type, &Operator::IsNotDistinctFrom, &right_type)?; + let expr = is_unknown(expr.cast_to(&coerced_type, &self.schema)?); + expr.rewrite(&mut self.const_evaluator) + } + Expr::IsNotUnknown(expr) => { + // will convert the binary(expr,IsDistinctFrom,lit(Boolean(None)); + let left_type = expr.get_type(&self.schema)?; + let right_type = DataType::Boolean; + let coerced_type = + coerce_types(&left_type, &Operator::IsDistinctFrom, &right_type)?; + let expr = is_not_unknown(expr.cast_to(&coerced_type, &self.schema)?); + expr.rewrite(&mut self.const_evaluator) + } Expr::BinaryExpr { ref left, op, @@ -136,18 +214,34 @@ impl ExprRewriter for TypeCoercionRewriter<'_> { } => { let expr_type = expr.get_type(&self.schema)?; let low_type = low.get_type(&self.schema)?; - let coerced_type = comparison_coercion(&expr_type, &low_type) + let low_coerced_type = comparison_coercion(&expr_type, &low_type) .ok_or_else(|| { DataFusionError::Internal(format!( "Failed to coerce types {} and {} in BETWEEN expression", expr_type, low_type )) })?; + let high_type = high.get_type(&self.schema)?; + let high_coerced_type = comparison_coercion(&expr_type, &low_type) + .ok_or_else(|| { + DataFusionError::Internal(format!( + "Failed to coerce types {} and {} in BETWEEN expression", + expr_type, high_type + )) + })?; + let coercion_type = + comparison_coercion(&low_coerced_type, &high_coerced_type) + .ok_or_else(|| { + DataFusionError::Internal(format!( + "Failed to coerce types {} and {} in BETWEEN expression", + expr_type, high_type + )) + })?; let expr = Expr::Between { - expr: Box::new(expr.cast_to(&coerced_type, &self.schema)?), + expr: Box::new(expr.cast_to(&coercion_type, &self.schema)?), negated, - low: Box::new(low.cast_to(&coerced_type, &self.schema)?), - high: Box::new(high.cast_to(&coerced_type, &self.schema)?), + low: Box::new(low.cast_to(&coercion_type, &self.schema)?), + high: Box::new(high.cast_to(&coercion_type, &self.schema)?), }; expr.rewrite(&mut self.const_evaluator) } @@ -203,6 +297,15 @@ impl ExprRewriter for TypeCoercionRewriter<'_> { } } +// Support the `IsTrue` `IsNotTrue` `IsFalse` `IsNotFalse` type coercion. +// The above op will be rewrite to the binary op when creating the physical op. +fn get_casted_expr_for_bool_op(expr: &Expr, schema: &DFSchemaRef) -> Result { + let left_type = expr.get_type(schema)?; + let right_type = DataType::Boolean; + let coerced_type = coerce_types(&left_type, &Operator::IsDistinctFrom, &right_type)?; + expr.clone().cast_to(&coerced_type, schema) +} + /// Attempts to coerce the types of `list_types` to be comparable with the /// `expr_type`. /// Returns the common data type for `expr_type` and `list_types` @@ -440,10 +543,178 @@ mod test { Ok(()) } + #[test] + fn is_bool_for_type_coercion() -> Result<()> { + // is true + let expr = col("a").is_true(); + let empty = empty_with_type(DataType::Boolean); + let plan = LogicalPlan::Projection(Projection::try_new( + vec![expr.clone()], + empty, + None, + )?); + let rule = TypeCoercion::new(); + let mut config = OptimizerConfig::default(); + let plan = rule.optimize(&plan, &mut config).unwrap(); + assert_eq!( + "Projection: #a IS TRUE\n EmptyRelation", + &format!("{:?}", plan) + ); + let empty = empty_with_type(DataType::Int64); + let plan = LogicalPlan::Projection(Projection::try_new(vec![expr], empty, None)?); + let plan = rule.optimize(&plan, &mut config); + assert!(plan.is_err()); + assert!(plan.unwrap_err().to_string().contains("'Int64 IS DISTINCT FROM Boolean' can't be evaluated because there isn't a common type to coerce the types to")); + + // is not true + let expr = col("a").is_not_true(); + let empty = empty_with_type(DataType::Boolean); + let plan = LogicalPlan::Projection(Projection::try_new(vec![expr], empty, None)?); + let plan = rule.optimize(&plan, &mut config).unwrap(); + assert_eq!( + "Projection: #a IS NOT TRUE\n EmptyRelation", + &format!("{:?}", plan) + ); + + // is false + let expr = col("a").is_false(); + let empty = empty_with_type(DataType::Boolean); + let plan = LogicalPlan::Projection(Projection::try_new(vec![expr], empty, None)?); + let plan = rule.optimize(&plan, &mut config).unwrap(); + assert_eq!( + "Projection: #a IS FALSE\n EmptyRelation", + &format!("{:?}", plan) + ); + + // is not false + let expr = col("a").is_not_false(); + let empty = empty_with_type(DataType::Boolean); + let plan = LogicalPlan::Projection(Projection::try_new(vec![expr], empty, None)?); + let plan = rule.optimize(&plan, &mut config).unwrap(); + assert_eq!( + "Projection: #a IS NOT FALSE\n EmptyRelation", + &format!("{:?}", plan) + ); + Ok(()) + } + + #[test] + fn like_for_type_coercion() -> Result<()> { + // like : utf8 like "abc" + let expr = Box::new(col("a")); + let pattern = Box::new(lit(ScalarValue::Utf8(Some("abc".to_string())))); + let like_expr = Expr::Like { + negated: false, + expr, + pattern, + escape_char: None, + }; + let empty = empty_with_type(DataType::Utf8); + let plan = + LogicalPlan::Projection(Projection::try_new(vec![like_expr], empty, None)?); + let rule = TypeCoercion::new(); + let mut config = OptimizerConfig::default(); + let plan = rule.optimize(&plan, &mut config).unwrap(); + assert_eq!( + "Projection: #a LIKE Utf8(\"abc\")\n EmptyRelation", + &format!("{:?}", plan) + ); + + let expr = Box::new(col("a")); + let pattern = Box::new(lit(ScalarValue::Null)); + let like_expr = Expr::Like { + negated: false, + expr, + pattern, + escape_char: None, + }; + let empty = empty_with_type(DataType::Utf8); + let plan = + LogicalPlan::Projection(Projection::try_new(vec![like_expr], empty, None)?); + let rule = TypeCoercion::new(); + let mut config = OptimizerConfig::default(); + let plan = rule.optimize(&plan, &mut config).unwrap(); + assert_eq!( + "Projection: #a LIKE Utf8(NULL)\n EmptyRelation", + &format!("{:?}", plan) + ); + + let expr = Box::new(col("a")); + let pattern = Box::new(lit(ScalarValue::Utf8(Some("abc".to_string())))); + let like_expr = Expr::Like { + negated: false, + expr, + pattern, + escape_char: None, + }; + let empty = empty_with_type(DataType::Int64); + let plan = + LogicalPlan::Projection(Projection::try_new(vec![like_expr], empty, None)?); + let rule = TypeCoercion::new(); + let mut config = OptimizerConfig::default(); + let plan = rule.optimize(&plan, &mut config); + assert!(plan.is_err()); + assert!(plan.unwrap_err().to_string().contains("'Int64 LIKE Utf8' can't be evaluated because there isn't a common type to coerce the types to")); + Ok(()) + } + + #[test] + fn unknown_for_type_coercion() -> Result<()> { + // unknown + let expr = col("a").is_unknown(); + let empty = empty_with_type(DataType::Boolean); + let plan = LogicalPlan::Projection(Projection::try_new( + vec![expr.clone()], + empty, + None, + )?); + let rule = TypeCoercion::new(); + let mut config = OptimizerConfig::default(); + let plan = rule.optimize(&plan, &mut config).unwrap(); + assert_eq!( + "Projection: #a IS UNKNOWN\n EmptyRelation", + &format!("{:?}", plan) + ); + + let empty = empty_with_type(DataType::Utf8); + let plan = LogicalPlan::Projection(Projection::try_new(vec![expr], empty, None)?); + let rule = TypeCoercion::new(); + let mut config = OptimizerConfig::default(); + let plan = rule.optimize(&plan, &mut config); + assert!(plan.is_err()); + assert!(plan.unwrap_err().to_string().contains("'Utf8 IS NOT DISTINCT FROM Boolean' can't be evaluated because there isn't a common type to coerce the types to")); + + // is not unknown + let expr = col("a").is_not_unknown(); + let empty = empty_with_type(DataType::Boolean); + let plan = LogicalPlan::Projection(Projection::try_new(vec![expr], empty, None)?); + let rule = TypeCoercion::new(); + let mut config = OptimizerConfig::default(); + let plan = rule.optimize(&plan, &mut config).unwrap(); + assert_eq!( + "Projection: #a IS NOT UNKNOWN\n EmptyRelation", + &format!("{:?}", plan) + ); + Ok(()) + } + fn empty() -> Arc { Arc::new(LogicalPlan::EmptyRelation(EmptyRelation { produce_one_row: false, schema: Arc::new(DFSchema::empty()), })) } + + fn empty_with_type(data_type: DataType) -> Arc { + Arc::new(LogicalPlan::EmptyRelation(EmptyRelation { + produce_one_row: false, + schema: Arc::new( + DFSchema::new_with_metadata( + vec![DFField::new(None, "a", data_type, true)], + std::collections::HashMap::new(), + ) + .unwrap(), + ), + })) + } }