From d174f5f12a7d2f7638dd6ad892926c75651080f1 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Sun, 3 Nov 2024 16:59:57 -0600 Subject: [PATCH 1/5] only handle cases that simplify to equalityu comparisons --- .../simplify_expressions/expr_simplifier.rs | 46 +++++++++++++------ 1 file changed, 32 insertions(+), 14 deletions(-) diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index 40be1f85391d..1df5b6eb0e85 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -34,7 +34,7 @@ use datafusion_common::{ use datafusion_common::{internal_err, DFSchema, DataFusionError, Result, ScalarValue}; use datafusion_expr::simplify::ExprSimplifyResult; use datafusion_expr::{ - and, lit, or, BinaryExpr, Case, ColumnarValue, Expr, Like, Operator, Volatility, + and, lit, or, BinaryExpr, Case, ColumnarValue, Expr, Operator, Volatility, WindowFunctionDefinition, }; use datafusion_expr::{expr::ScalarFunction, interval_arithmetic::NullableInterval}; @@ -1470,19 +1470,27 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { }) => Transformed::yes(simplify_regex_expr(left, op, right)?), // Rules for Like - Expr::Like(Like { - expr, - pattern, - negated, - escape_char: _, - case_insensitive: _, - }) if !is_null(&expr) - && matches!( - pattern.as_ref(), - Expr::Literal(ScalarValue::Utf8(Some(pattern_str))) if pattern_str == "%" - ) => - { - Transformed::yes(lit(!negated)) + Expr::Like(ref like_expr) => { + if let Expr::Literal(ScalarValue::Utf8(Some(pattern_str))) = like_expr.pattern.as_ref() { + if !is_null(&like_expr.expr) && pattern_str == "%" { + Transformed::yes(lit(!like_expr.negated)) + } else if !pattern_str.contains(['%', '_'].as_ref()) { + // If the pattern does not contain any wildcards, we can simplify the like expression to an equality expression + Transformed::yes( + Expr::BinaryExpr( + BinaryExpr { + left: like_expr.expr.clone(), + op: if like_expr.negated { NotEq } else { Eq }, + right: like_expr.pattern.clone(), + } + ) + ) + } else { + Transformed::no(expr) + } + } else { + Transformed::no(expr) + } } // a is not null/unknown --> true (if a is not nullable) @@ -3632,6 +3640,16 @@ mod tests { let expr = not_ilike(null, "%"); assert_eq!(simplify(expr), lit_bool_null()); + + // test cases that get converted to equality + let expr = like(col("c1"), "a"); + assert_eq!(simplify(expr), col("c1").eq(lit("a"))); + let expr = not_like(col("c1"), "a"); + assert_eq!(simplify(expr), col("c1").not_eq(lit("a"))); + let expr = like(col("c1"), "a_"); + assert_eq!(simplify(expr), col("c1").like(lit("a_"))); + let expr = not_like(col("c1"), "a_"); + assert_eq!(simplify(expr), col("c1").not_like(lit("a_"))); } #[test] From 354e62c88da64b5cf3011322c38ab95bd1eb1a07 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Sun, 3 Nov 2024 17:03:01 -0600 Subject: [PATCH 2/5] fmt --- .../simplify_expressions/expr_simplifier.rs | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index 1df5b6eb0e85..9c27fc99982f 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -1471,20 +1471,18 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { // Rules for Like Expr::Like(ref like_expr) => { - if let Expr::Literal(ScalarValue::Utf8(Some(pattern_str))) = like_expr.pattern.as_ref() { + if let Expr::Literal(ScalarValue::Utf8(Some(pattern_str))) = + like_expr.pattern.as_ref() + { if !is_null(&like_expr.expr) && pattern_str == "%" { Transformed::yes(lit(!like_expr.negated)) } else if !pattern_str.contains(['%', '_'].as_ref()) { // If the pattern does not contain any wildcards, we can simplify the like expression to an equality expression - Transformed::yes( - Expr::BinaryExpr( - BinaryExpr { - left: like_expr.expr.clone(), - op: if like_expr.negated { NotEq } else { Eq }, - right: like_expr.pattern.clone(), - } - ) - ) + Transformed::yes(Expr::BinaryExpr(BinaryExpr { + left: like_expr.expr.clone(), + op: if like_expr.negated { NotEq } else { Eq }, + right: like_expr.pattern.clone(), + })) } else { Transformed::no(expr) } From 6c847fa6a84691de6c869f6fd39d8795a78a48b0 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Mon, 4 Nov 2024 08:14:24 -0600 Subject: [PATCH 3/5] add test for null pattern --- .../simplify_expressions/expr_simplifier.rs | 26 +++++++++++++++---- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index 9c27fc99982f..f410ab8190cf 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -1478,11 +1478,17 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { Transformed::yes(lit(!like_expr.negated)) } else if !pattern_str.contains(['%', '_'].as_ref()) { // If the pattern does not contain any wildcards, we can simplify the like expression to an equality expression - Transformed::yes(Expr::BinaryExpr(BinaryExpr { - left: like_expr.expr.clone(), - op: if like_expr.negated { NotEq } else { Eq }, - right: like_expr.pattern.clone(), - })) + if like_expr.escape_char.is_some() { + // TODO: handle escape characters + // These currently aren't anywhere else in DataFusion + Transformed::no(expr) + } else { + Transformed::yes(Expr::BinaryExpr(BinaryExpr { + left: like_expr.expr.clone(), + op: if like_expr.negated { NotEq } else { Eq }, + right: like_expr.pattern.clone(), + })) + } } else { Transformed::no(expr) } @@ -3648,6 +3654,16 @@ mod tests { assert_eq!(simplify(expr), col("c1").like(lit("a_"))); let expr = not_like(col("c1"), "a_"); assert_eq!(simplify(expr), col("c1").not_like(lit("a_"))); + + // null pattern + let expr = Expr::Like(Like { + negated: false, + expr: Box::new(col("c1")), + pattern: Box::new(Expr::Literal(ScalarValue::Utf8(None))), + escape_char: None, + case_insensitive: false, + }); + assert_eq!(simplify(expr.clone()), expr); } #[test] From 99b54beed11c0a5cd3efc7b2d970edff32091031 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Mon, 4 Nov 2024 11:00:41 -0600 Subject: [PATCH 4/5] remove faulty logic, merge conditions --- .../simplify_expressions/expr_simplifier.rs | 24 ++++++++----------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index f410ab8190cf..7f7923a96f41 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -1474,21 +1474,17 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { if let Expr::Literal(ScalarValue::Utf8(Some(pattern_str))) = like_expr.pattern.as_ref() { - if !is_null(&like_expr.expr) && pattern_str == "%" { - Transformed::yes(lit(!like_expr.negated)) - } else if !pattern_str.contains(['%', '_'].as_ref()) { + if !pattern_str.contains(['%', '_'].as_ref()) && like_expr.escape_char.is_none() { // If the pattern does not contain any wildcards, we can simplify the like expression to an equality expression - if like_expr.escape_char.is_some() { - // TODO: handle escape characters - // These currently aren't anywhere else in DataFusion - Transformed::no(expr) - } else { - Transformed::yes(Expr::BinaryExpr(BinaryExpr { - left: like_expr.expr.clone(), - op: if like_expr.negated { NotEq } else { Eq }, - right: like_expr.pattern.clone(), - })) - } + + // TODO: handle escape characters + // These currently aren't anywhere else in DataFusion + + Transformed::yes(Expr::BinaryExpr(BinaryExpr { + left: like_expr.expr.clone(), + op: if like_expr.negated { NotEq } else { Eq }, + right: like_expr.pattern.clone(), + })) } else { Transformed::no(expr) } From 1f13d0d1a715d768262cc300460bee7ce54756a4 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Mon, 4 Nov 2024 11:00:51 -0600 Subject: [PATCH 5/5] fmt --- .../optimizer/src/simplify_expressions/expr_simplifier.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index 7f7923a96f41..56aa77595e5f 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -1474,9 +1474,11 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { if let Expr::Literal(ScalarValue::Utf8(Some(pattern_str))) = like_expr.pattern.as_ref() { - if !pattern_str.contains(['%', '_'].as_ref()) && like_expr.escape_char.is_none() { + if !pattern_str.contains(['%', '_'].as_ref()) + && like_expr.escape_char.is_none() + { // If the pattern does not contain any wildcards, we can simplify the like expression to an equality expression - + // TODO: handle escape characters // These currently aren't anywhere else in DataFusion