-
Notifications
You must be signed in to change notification settings - Fork 1.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix: don't extract common sub expr in CASE WHEN
clause
#8833
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,6 +20,7 @@ | |
use std::collections::{BTreeSet, HashMap}; | ||
use std::sync::Arc; | ||
|
||
use crate::utils::is_volatile_expression; | ||
use crate::{utils, OptimizerConfig, OptimizerRule}; | ||
|
||
use arrow::datatypes::DataType; | ||
|
@@ -29,7 +30,7 @@ use datafusion_common::tree_node::{ | |
use datafusion_common::{ | ||
internal_err, Column, DFField, DFSchema, DFSchemaRef, DataFusionError, Result, | ||
}; | ||
use datafusion_expr::expr::{is_volatile, Alias}; | ||
use datafusion_expr::expr::Alias; | ||
use datafusion_expr::logical_plan::{ | ||
Aggregate, Filter, LogicalPlan, Projection, Sort, Window, | ||
}; | ||
|
@@ -518,7 +519,7 @@ enum ExprMask { | |
} | ||
|
||
impl ExprMask { | ||
fn ignores(&self, expr: &Expr) -> Result<bool> { | ||
fn ignores(&self, expr: &Expr) -> bool { | ||
let is_normal_minus_aggregates = matches!( | ||
expr, | ||
Expr::Literal(..) | ||
|
@@ -529,14 +530,12 @@ impl ExprMask { | |
| Expr::Wildcard { .. } | ||
); | ||
|
||
let is_volatile = is_volatile(expr)?; | ||
|
||
let is_aggr = matches!(expr, Expr::AggregateFunction(..)); | ||
|
||
Ok(match self { | ||
Self::Normal => is_volatile || is_normal_minus_aggregates || is_aggr, | ||
Self::NormalAndAggregates => is_volatile || is_normal_minus_aggregates, | ||
}) | ||
match self { | ||
Self::Normal => is_normal_minus_aggregates || is_aggr, | ||
Self::NormalAndAggregates => is_normal_minus_aggregates, | ||
} | ||
} | ||
} | ||
|
||
|
@@ -614,7 +613,12 @@ impl ExprIdentifierVisitor<'_> { | |
impl TreeNodeVisitor for ExprIdentifierVisitor<'_> { | ||
type N = Expr; | ||
|
||
fn pre_visit(&mut self, _expr: &Expr) -> Result<VisitRecursion> { | ||
fn pre_visit(&mut self, expr: &Expr) -> Result<VisitRecursion> { | ||
// related to https://github.com/apache/arrow-datafusion/issues/8814 | ||
// If the expr contain volatile expression or is a case expression, skip it. | ||
if matches!(expr, Expr::Case(..)) || is_volatile_expression(expr)? { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this problem could exist in other function/expression like It can be tracked as a future ticket. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The inner expressions in these expressions may be short-circuited. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. track in #8874 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I comment in the #8874, Do we have any method for this rule to make sure the new function/expr or other cases we can't find will not bring the same bug |
||
return Ok(VisitRecursion::Skip); | ||
} | ||
self.visit_stack | ||
.push(VisitRecord::EnterMark(self.node_count)); | ||
self.node_count += 1; | ||
|
@@ -628,7 +632,7 @@ impl TreeNodeVisitor for ExprIdentifierVisitor<'_> { | |
|
||
let (idx, sub_expr_desc) = self.pop_enter_mark(); | ||
// skip exprs should not be recognize. | ||
if self.expr_mask.ignores(expr)? { | ||
if self.expr_mask.ignores(expr) { | ||
self.id_array[idx].0 = self.series_number; | ||
let desc = Self::desc_expr(expr); | ||
self.visit_stack.push(VisitRecord::ExprItem(desc)); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,7 @@ use std::collections::{HashMap, HashSet}; | |
use std::sync::Arc; | ||
|
||
use crate::optimizer::ApplyOrder; | ||
use crate::utils::is_volatile_expression; | ||
use crate::{OptimizerConfig, OptimizerRule}; | ||
|
||
use datafusion_common::tree_node::{Transformed, TreeNode, VisitRecursion}; | ||
|
@@ -34,7 +35,7 @@ use datafusion_expr::logical_plan::{ | |
use datafusion_expr::utils::{conjunction, split_conjunction, split_conjunction_owned}; | ||
use datafusion_expr::{ | ||
and, build_join_schema, or, BinaryExpr, Expr, Filter, LogicalPlanBuilder, Operator, | ||
ScalarFunctionDefinition, TableProviderFilterPushDown, Volatility, | ||
ScalarFunctionDefinition, TableProviderFilterPushDown, | ||
}; | ||
|
||
use itertools::Itertools; | ||
|
@@ -739,7 +740,9 @@ impl OptimizerRule for PushDownFilter { | |
|
||
(field.qualified_name(), expr) | ||
}) | ||
.partition(|(_, value)| is_volatile_expression(value)); | ||
.partition(|(_, value)| { | ||
is_volatile_expression(value).unwrap_or(true) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we don't know whether the scalar function is volatile, default set it to a volatile function. |
||
}); | ||
|
||
let mut push_predicates = vec![]; | ||
let mut keep_predicates = vec![]; | ||
|
@@ -1028,38 +1031,6 @@ pub fn replace_cols_by_name( | |
}) | ||
} | ||
|
||
/// check whether the expression is volatile predicates | ||
fn is_volatile_expression(e: &Expr) -> bool { | ||
let mut is_volatile = false; | ||
e.apply(&mut |expr| { | ||
Ok(match expr { | ||
Expr::ScalarFunction(f) => match &f.func_def { | ||
ScalarFunctionDefinition::BuiltIn(fun) | ||
if fun.volatility() == Volatility::Volatile => | ||
{ | ||
is_volatile = true; | ||
VisitRecursion::Stop | ||
} | ||
ScalarFunctionDefinition::UDF(fun) | ||
if fun.signature().volatility == Volatility::Volatile => | ||
{ | ||
is_volatile = true; | ||
VisitRecursion::Stop | ||
} | ||
ScalarFunctionDefinition::Name(_) => { | ||
return internal_err!( | ||
"Function `Expr` with name should be resolved." | ||
); | ||
} | ||
_ => VisitRecursion::Continue, | ||
}, | ||
_ => VisitRecursion::Continue, | ||
}) | ||
}) | ||
.unwrap(); | ||
is_volatile | ||
} | ||
|
||
/// check whether the expression uses the columns in `check_map`. | ||
fn contain(e: &Expr, check_map: &HashMap<String, Expr>) -> bool { | ||
let mut is_contain = false; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,8 +18,10 @@ | |
//! Collection of utility functions that are leveraged by the query optimizer rules | ||
|
||
use crate::{OptimizerConfig, OptimizerRule}; | ||
use datafusion_common::tree_node::{TreeNode, VisitRecursion}; | ||
use datafusion_common::{Column, DFSchemaRef}; | ||
use datafusion_common::{DFSchema, Result}; | ||
use datafusion_expr::expr::is_volatile; | ||
use datafusion_expr::expr_rewriter::replace_col; | ||
use datafusion_expr::utils as expr_utils; | ||
use datafusion_expr::{logical_plan::LogicalPlan, Expr, Operator}; | ||
|
@@ -92,6 +94,20 @@ pub fn log_plan(description: &str, plan: &LogicalPlan) { | |
trace!("{description}::\n{}\n", plan.display_indent_schema()); | ||
} | ||
|
||
/// check whether the expression is volatile predicates | ||
pub(crate) fn is_volatile_expression(e: &Expr) -> Result<bool> { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. move |
||
let mut is_volatile_expr = false; | ||
e.apply(&mut |expr| { | ||
Ok(if is_volatile(expr)? { | ||
is_volatile_expr = true; | ||
VisitRecursion::Stop | ||
} else { | ||
VisitRecursion::Continue | ||
}) | ||
})?; | ||
Ok(is_volatile_expr) | ||
} | ||
|
||
/// Splits a conjunctive [`Expr`] such as `A AND B AND C` => `[A, B, C]` | ||
/// | ||
/// See [`split_conjunction_owned`] for more details and an example. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1112,3 +1112,22 @@ SELECT abs(x), abs(x) + abs(y) FROM t; | |
|
||
statement ok | ||
DROP TABLE t; | ||
|
||
# related to https://github.com/apache/arrow-datafusion/issues/8814 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍🏻 |
||
statement ok | ||
create table t(x int, y int) as values (1,1), (2,2), (3,3), (0,0), (4,0); | ||
|
||
query II | ||
SELECT | ||
CASE WHEN B.x > 0 THEN A.x / B.x ELSE 0 END AS value1, | ||
CASE WHEN B.x > 0 AND B.y > 0 THEN A.x / B.x ELSE 0 END AS value3 | ||
FROM t AS A, (SELECT * FROM t WHERE x = 0) AS B; | ||
---- | ||
0 0 | ||
0 0 | ||
0 0 | ||
0 0 | ||
0 0 | ||
|
||
statement ok | ||
DROP TABLE t; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Check volatile expression is this place is because the previous method only can deal with a single
random()
function, the below query wil return true in main branch