Skip to content

Commit

Permalink
suport like,unknown for type coercion
Browse files Browse the repository at this point in the history
  • Loading branch information
liukun4515 committed Sep 17, 2022
1 parent f6abe08 commit 67696c2
Show file tree
Hide file tree
Showing 3 changed files with 209 additions and 13 deletions.
10 changes: 10 additions & 0 deletions datafusion/expr/src/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -517,6 +517,16 @@ impl Expr {
Expr::IsNotFalse(Box::new(self))
}

/// Return `IsUnknown(Box(self))`
pub fn is_unknown(self) -> Expr {
Expr::IsUnknown(Box::new(self))
}

/// Return `IsNotUnknown(Box(self))`
pub fn is_not_unknown(self) -> Expr {
Expr::IsNotUnknown(Box::new(self))
}

pub fn try_into_col(&self) -> Result<Column> {
match self {
Expr::Column(it) => Ok(it.clone()),
Expand Down
10 changes: 10 additions & 0 deletions datafusion/expr/src/expr_fn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,16 @@ pub fn is_not_false(expr: Expr) -> Expr {
Expr::IsNotFalse(Box::new(expr))
}

/// Create is unknown expression
pub fn is_unknown(expr: Expr) -> Expr {
Expr::IsUnknown(Box::new(expr))
}

/// Create is not unknown expression
pub fn is_not_unknown(expr: Expr) -> Expr {
Expr::IsNotUnknown(Box::new(expr))
}

/// Create an convenience function representing a unary scalar function
macro_rules! unary_scalar_expr {
($ENUM:ident, $FUNC:ident, $DOC:expr) => {
Expand Down
202 changes: 189 additions & 13 deletions datafusion/optimizer/src/type_coercion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ use datafusion_expr::expr_rewriter::{ExprRewritable, ExprRewriter, RewriteRecurs
use datafusion_expr::type_coercion::data_types;
use datafusion_expr::utils::from_plan;
use datafusion_expr::{
is_false, is_not_false, is_not_true, is_true, Expr, LogicalPlan, Operator,
is_false, is_not_false, is_not_true, is_not_unknown, is_true, is_unknown, Expr,
LogicalPlan, Operator,
};
use datafusion_expr::{ExprSchemable, Signature};
use datafusion_physical_expr::execution_props::ExecutionProps;
Expand Down Expand Up @@ -101,20 +102,79 @@ impl ExprRewriter for TypeCoercionRewriter<'_> {
fn mutate(&mut self, expr: Expr) -> Result<Expr> {
match expr {
Expr::IsTrue(expr) => {
let result_expr = get_casted_expr_for_bool_op(&expr, &self.schema)?;
Ok(is_true(result_expr))
let expr = is_true(get_casted_expr_for_bool_op(&expr, &self.schema)?);
expr.rewrite(&mut self.const_evaluator)
}
Expr::IsNotTrue(expr) => {
let result_expr = get_casted_expr_for_bool_op(&expr, &self.schema)?;
Ok(is_not_true(result_expr))
let expr = is_not_true(get_casted_expr_for_bool_op(&expr, &self.schema)?);
expr.rewrite(&mut self.const_evaluator)
}
Expr::IsFalse(expr) => {
let result_expr = get_casted_expr_for_bool_op(&expr, &self.schema)?;
Ok(is_false(result_expr))
let expr = is_false(get_casted_expr_for_bool_op(&expr, &self.schema)?);
expr.rewrite(&mut self.const_evaluator)
}
Expr::IsNotFalse(expr) => {
let result_expr = get_casted_expr_for_bool_op(&expr, &self.schema)?;
Ok(is_not_false(result_expr))
let expr =
is_not_false(get_casted_expr_for_bool_op(&expr, &self.schema)?);
expr.rewrite(&mut self.const_evaluator)
}
Expr::Like {
negated,
expr,
pattern,
escape_char,
} => {
let left_type = expr.get_type(&self.schema)?;
let right_type = pattern.get_type(&self.schema)?;
let coerced_type =
coerce_types(&left_type, &Operator::Like, &right_type)?;
let expr = Box::new(expr.cast_to(&coerced_type, &self.schema)?);
let pattern = Box::new(pattern.cast_to(&coerced_type, &self.schema)?);
let expr = Expr::Like {
negated,
expr,
pattern,
escape_char,
};
expr.rewrite(&mut self.const_evaluator)
}
Expr::ILike {
negated,
expr,
pattern,
escape_char,
} => {
let left_type = expr.get_type(&self.schema)?;
let right_type = pattern.get_type(&self.schema)?;
let coerced_type =
coerce_types(&left_type, &Operator::Like, &right_type)?;
let expr = Box::new(expr.cast_to(&coerced_type, &self.schema)?);
let pattern = Box::new(pattern.cast_to(&coerced_type, &self.schema)?);
let expr = Expr::ILike {
negated,
expr,
pattern,
escape_char,
};
expr.rewrite(&mut self.const_evaluator)
}
Expr::IsUnknown(expr) => {
// will convert the binary(expr,IsNotDistinctFrom,lit(Boolean(None));
let left_type = expr.get_type(&self.schema)?;
let right_type = DataType::Boolean;
let coerced_type =
coerce_types(&left_type, &Operator::IsNotDistinctFrom, &right_type)?;
let expr = is_unknown(expr.cast_to(&coerced_type, &self.schema)?);
expr.rewrite(&mut self.const_evaluator)
}
Expr::IsNotUnknown(expr) => {
// will convert the binary(expr,IsDistinctFrom,lit(Boolean(None));
let left_type = expr.get_type(&self.schema)?;
let right_type = DataType::Boolean;
let coerced_type =
coerce_types(&left_type, &Operator::IsDistinctFrom, &right_type)?;
let expr = is_not_unknown(expr.cast_to(&coerced_type, &self.schema)?);
expr.rewrite(&mut self.const_evaluator)
}
Expr::BinaryExpr {
ref left,
Expand Down Expand Up @@ -154,18 +214,34 @@ impl ExprRewriter for TypeCoercionRewriter<'_> {
} => {
let expr_type = expr.get_type(&self.schema)?;
let low_type = low.get_type(&self.schema)?;
let coerced_type = comparison_coercion(&expr_type, &low_type)
let low_coerced_type = comparison_coercion(&expr_type, &low_type)
.ok_or_else(|| {
DataFusionError::Internal(format!(
"Failed to coerce types {} and {} in BETWEEN expression",
expr_type, low_type
))
})?;
let high_type = high.get_type(&self.schema)?;
let high_coerced_type = comparison_coercion(&expr_type, &low_type)
.ok_or_else(|| {
DataFusionError::Internal(format!(
"Failed to coerce types {} and {} in BETWEEN expression",
expr_type, high_type
))
})?;
let coercion_type =
comparison_coercion(&low_coerced_type, &high_coerced_type)
.ok_or_else(|| {
DataFusionError::Internal(format!(
"Failed to coerce types {} and {} in BETWEEN expression",
expr_type, high_type
))
})?;
let expr = Expr::Between {
expr: Box::new(expr.cast_to(&coerced_type, &self.schema)?),
expr: Box::new(expr.cast_to(&coercion_type, &self.schema)?),
negated,
low: Box::new(low.cast_to(&coerced_type, &self.schema)?),
high: Box::new(high.cast_to(&coerced_type, &self.schema)?),
low: Box::new(low.cast_to(&coercion_type, &self.schema)?),
high: Box::new(high.cast_to(&coercion_type, &self.schema)?),
};
expr.rewrite(&mut self.const_evaluator)
}
Expand Down Expand Up @@ -522,6 +598,106 @@ mod test {
Ok(())
}

#[test]
fn like_for_type_coercion() -> Result<()> {
// like : utf8 like "abc"
let expr = Box::new(col("a"));
let pattern = Box::new(lit(ScalarValue::Utf8(Some("abc".to_string()))));
let like_expr = Expr::Like {
negated: false,
expr,
pattern,
escape_char: None,
};
let empty = empty_with_type(DataType::Utf8);
let plan =
LogicalPlan::Projection(Projection::try_new(vec![like_expr], empty, None)?);
let rule = TypeCoercion::new();
let mut config = OptimizerConfig::default();
let plan = rule.optimize(&plan, &mut config).unwrap();
assert_eq!(
"Projection: #a LIKE Utf8(\"abc\")\n EmptyRelation",
&format!("{:?}", plan)
);

let expr = Box::new(col("a"));
let pattern = Box::new(lit(ScalarValue::Null));
let like_expr = Expr::Like {
negated: false,
expr,
pattern,
escape_char: None,
};
let empty = empty_with_type(DataType::Utf8);
let plan =
LogicalPlan::Projection(Projection::try_new(vec![like_expr], empty, None)?);
let rule = TypeCoercion::new();
let mut config = OptimizerConfig::default();
let plan = rule.optimize(&plan, &mut config).unwrap();
assert_eq!(
"Projection: #a LIKE Utf8(NULL)\n EmptyRelation",
&format!("{:?}", plan)
);

let expr = Box::new(col("a"));
let pattern = Box::new(lit(ScalarValue::Utf8(Some("abc".to_string()))));
let like_expr = Expr::Like {
negated: false,
expr,
pattern,
escape_char: None,
};
let empty = empty_with_type(DataType::Int64);
let plan =
LogicalPlan::Projection(Projection::try_new(vec![like_expr], empty, None)?);
let rule = TypeCoercion::new();
let mut config = OptimizerConfig::default();
let plan = rule.optimize(&plan, &mut config);
assert!(plan.is_err());
assert!(plan.unwrap_err().to_string().contains("'Int64 LIKE Utf8' can't be evaluated because there isn't a common type to coerce the types to"));
Ok(())
}

#[test]
fn unknown_for_type_coercion() -> Result<()> {
// unknown
let expr = col("a").is_unknown();
let empty = empty_with_type(DataType::Boolean);
let plan = LogicalPlan::Projection(Projection::try_new(
vec![expr.clone()],
empty,
None,
)?);
let rule = TypeCoercion::new();
let mut config = OptimizerConfig::default();
let plan = rule.optimize(&plan, &mut config).unwrap();
assert_eq!(
"Projection: #a IS UNKNOWN\n EmptyRelation",
&format!("{:?}", plan)
);

let empty = empty_with_type(DataType::Utf8);
let plan = LogicalPlan::Projection(Projection::try_new(vec![expr], empty, None)?);
let rule = TypeCoercion::new();
let mut config = OptimizerConfig::default();
let plan = rule.optimize(&plan, &mut config);
assert!(plan.is_err());
assert!(plan.unwrap_err().to_string().contains("'Utf8 IS NOT DISTINCT FROM Boolean' can't be evaluated because there isn't a common type to coerce the types to"));

// is not unknown
let expr = col("a").is_not_unknown();
let empty = empty_with_type(DataType::Boolean);
let plan = LogicalPlan::Projection(Projection::try_new(vec![expr], empty, None)?);
let rule = TypeCoercion::new();
let mut config = OptimizerConfig::default();
let plan = rule.optimize(&plan, &mut config).unwrap();
assert_eq!(
"Projection: #a IS NOT UNKNOWN\n EmptyRelation",
&format!("{:?}", plan)
);
Ok(())
}

fn empty() -> Arc<LogicalPlan> {
Arc::new(LogicalPlan::EmptyRelation(EmptyRelation {
produce_one_row: false,
Expand Down

0 comments on commit 67696c2

Please sign in to comment.