From 1844c8041b7f32b7f436b73a86b10aebbcd3c461 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 9 Jul 2024 21:30:28 -0400 Subject: [PATCH] Remove redundant `unalias_nested` calls for creating Filter's (#11340) * Remove uncessary unalias_nested calls when creating Filter * simplify --- datafusion/expr/src/logical_plan/plan.rs | 54 ++++--------------- .../optimizer/src/common_subexpr_eliminate.rs | 11 +--- datafusion/optimizer/src/push_down_filter.rs | 4 +- 3 files changed, 13 insertions(+), 56 deletions(-) diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index 998b5bdcb60c8..bde9655b8a390 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -41,9 +41,7 @@ use crate::{ }; use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; -use datafusion_common::tree_node::{ - Transformed, TransformedResult, TreeNode, TreeNodeRecursion, -}; +use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRecursion}; use datafusion_common::{ aggregate_functional_dependencies, internal_err, plan_err, Column, Constraints, DFSchema, DFSchemaRef, DataFusionError, Dependency, FunctionalDependence, @@ -645,39 +643,6 @@ impl LogicalPlan { Ok(LogicalPlan::Values(Values { schema, values })) } LogicalPlan::Filter(Filter { predicate, input }) => { - // todo: should this logic be moved to Filter::try_new? - - // filter predicates should not contain aliased expressions so we remove any aliases - // before this logic was added we would have aliases within filters such as for - // benchmark q6: - // - // lineitem.l_shipdate >= Date32(\"8766\") - // AND lineitem.l_shipdate < Date32(\"9131\") - // AND CAST(lineitem.l_discount AS Decimal128(30, 15)) AS lineitem.l_discount >= - // Decimal128(Some(49999999999999),30,15) - // AND CAST(lineitem.l_discount AS Decimal128(30, 15)) AS lineitem.l_discount <= - // Decimal128(Some(69999999999999),30,15) - // AND lineitem.l_quantity < Decimal128(Some(2400),15,2) - - let predicate = predicate - .transform_down(|expr| { - match expr { - Expr::Exists { .. } - | Expr::ScalarSubquery(_) - | Expr::InSubquery(_) => { - // subqueries could contain aliases so we don't recurse into those - Ok(Transformed::new(expr, false, TreeNodeRecursion::Jump)) - } - Expr::Alias(_) => Ok(Transformed::new( - expr.unalias(), - true, - TreeNodeRecursion::Jump, - )), - _ => Ok(Transformed::no(expr)), - } - }) - .data()?; - Filter::try_new(predicate, input).map(LogicalPlan::Filter) } LogicalPlan::Repartition(_) => Ok(self), @@ -878,7 +843,7 @@ impl LogicalPlan { } LogicalPlan::Filter { .. } => { assert_eq!(1, expr.len()); - let predicate = expr.pop().unwrap().unalias_nested().data; + let predicate = expr.pop().unwrap(); Filter::try_new(predicate, Arc::new(inputs.swap_remove(0))) .map(LogicalPlan::Filter) @@ -2117,6 +2082,9 @@ pub struct Filter { impl Filter { /// Create a new filter operator. + /// + /// Notes: as Aliases have no effect on the output of a filter operator, + /// they are removed from the predicate expression. pub fn try_new(predicate: Expr, input: Arc) -> Result { // Filter predicates must return a boolean value so we try and validate that here. // Note that it is not always possible to resolve the predicate expression during plan @@ -2940,7 +2908,7 @@ mod tests { use crate::logical_plan::table_scan; use crate::{col, exists, in_subquery, lit, placeholder, GroupingSet}; - use datafusion_common::tree_node::TreeNodeVisitor; + use datafusion_common::tree_node::{TransformedResult, TreeNodeVisitor}; use datafusion_common::{not_impl_err, Constraint, ScalarValue}; use crate::test::function_stub::count; @@ -3500,11 +3468,8 @@ digraph { })); let col = schema.field_names()[0].clone(); - let filter = Filter::try_new( - Expr::Column(col.into()).eq(Expr::Literal(ScalarValue::Int32(Some(1)))), - scan, - ) - .unwrap(); + let filter = + Filter::try_new(Expr::Column(col.into()).eq(lit(1i32)), scan).unwrap(); assert!(filter.is_scalar()); } @@ -3522,8 +3487,7 @@ digraph { .build() .unwrap(); - let external_filter = - col("foo").eq(Expr::Literal(ScalarValue::Boolean(Some(true)))); + let external_filter = col("foo").eq(lit(true)); // after transformation, because plan is not the same anymore, // the parent plan is built again with call to LogicalPlan::with_new_inputs -> with_new_exprs diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs index 4a4933fe9cfdb..e18d8bc91bf60 100644 --- a/datafusion/optimizer/src/common_subexpr_eliminate.rs +++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs @@ -342,16 +342,9 @@ impl CommonSubexprEliminate { let input = unwrap_arc(input); let expr = vec![predicate]; self.try_unary_plan(expr, input, config)? - .transform_data(|(mut new_expr, new_input)| { + .map_data(|(mut new_expr, new_input)| { assert_eq!(new_expr.len(), 1); // passed in vec![predicate] - let new_predicate = new_expr - .pop() - .unwrap() - .unalias_nested() - .update_data(|new_predicate| (new_predicate, new_input)); - Ok(new_predicate) - })? - .map_data(|(new_predicate, new_input)| { + let new_predicate = new_expr.pop().unwrap(); Filter::try_new(new_predicate, Arc::new(new_input)) .map(LogicalPlan::Filter) }) diff --git a/datafusion/optimizer/src/push_down_filter.rs b/datafusion/optimizer/src/push_down_filter.rs index 1c3186b762b71..0a3bae154bd64 100644 --- a/datafusion/optimizer/src/push_down_filter.rs +++ b/datafusion/optimizer/src/push_down_filter.rs @@ -761,11 +761,11 @@ impl OptimizerRule for PushDownFilter { // Push down non-unnest filter predicate // Unnest - // Unenst Input (Projection) + // Unnest Input (Projection) // -> rewritten to // Unnest // Filter - // Unenst Input (Projection) + // Unnest Input (Projection) let unnest_input = std::mem::take(&mut unnest.input);