Skip to content

Commit

Permalink
aggregate_statistics should only optimize MIN/MAX when relation is no…
Browse files Browse the repository at this point in the history
…t empty (#8914)

* Fix aggregate_statistics

* Add more test
  • Loading branch information
viirya authored Jan 20, 2024
1 parent ae0f401 commit d0c84cc
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 20 deletions.
62 changes: 42 additions & 20 deletions datafusion/core/src/physical_optimizer/aggregate_statistics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -197,17 +197,28 @@ fn take_optimizable_min(
agg_expr: &dyn AggregateExpr,
stats: &Statistics,
) -> Option<(ScalarValue, String)> {
let col_stats = &stats.column_statistics;
if let Some(casted_expr) = agg_expr.as_any().downcast_ref::<expressions::Min>() {
if casted_expr.expressions().len() == 1 {
// TODO optimize with exprs other than Column
if let Some(col_expr) = casted_expr.expressions()[0]
.as_any()
.downcast_ref::<expressions::Column>()
if let Precision::Exact(num_rows) = &stats.num_rows {
if *num_rows > 0 {
let col_stats = &stats.column_statistics;
if let Some(casted_expr) =
agg_expr.as_any().downcast_ref::<expressions::Min>()
{
if let Precision::Exact(val) = &col_stats[col_expr.index()].min_value {
if !val.is_null() {
return Some((val.clone(), casted_expr.name().to_string()));
if casted_expr.expressions().len() == 1 {
// TODO optimize with exprs other than Column
if let Some(col_expr) = casted_expr.expressions()[0]
.as_any()
.downcast_ref::<expressions::Column>()
{
if let Precision::Exact(val) =
&col_stats[col_expr.index()].min_value
{
if !val.is_null() {
return Some((
val.clone(),
casted_expr.name().to_string(),
));
}
}
}
}
}
Expand All @@ -221,17 +232,28 @@ fn take_optimizable_max(
agg_expr: &dyn AggregateExpr,
stats: &Statistics,
) -> Option<(ScalarValue, String)> {
let col_stats = &stats.column_statistics;
if let Some(casted_expr) = agg_expr.as_any().downcast_ref::<expressions::Max>() {
if casted_expr.expressions().len() == 1 {
// TODO optimize with exprs other than Column
if let Some(col_expr) = casted_expr.expressions()[0]
.as_any()
.downcast_ref::<expressions::Column>()
if let Precision::Exact(num_rows) = &stats.num_rows {
if *num_rows > 0 {
let col_stats = &stats.column_statistics;
if let Some(casted_expr) =
agg_expr.as_any().downcast_ref::<expressions::Max>()
{
if let Precision::Exact(val) = &col_stats[col_expr.index()].max_value {
if !val.is_null() {
return Some((val.clone(), casted_expr.name().to_string()));
if casted_expr.expressions().len() == 1 {
// TODO optimize with exprs other than Column
if let Some(col_expr) = casted_expr.expressions()[0]
.as_any()
.downcast_ref::<expressions::Column>()
{
if let Precision::Exact(val) =
&col_stats[col_expr.index()].max_value
{
if !val.is_null() {
return Some((
val.clone(),
casted_expr.name().to_string(),
));
}
}
}
}
}
Expand Down
34 changes: 34 additions & 0 deletions datafusion/sqllogictest/test_files/aggregate.slt
Original file line number Diff line number Diff line change
Expand Up @@ -3260,3 +3260,37 @@ query I
select count(*) from (select count(*) a, count(*) b from (select 1));
----
1

# rule `aggregate_statistics` should not optimize MIN/MAX to wrong values on empty relation

statement ok
CREATE TABLE empty(col0 INTEGER);

query I
SELECT MIN(col0) FROM empty WHERE col0=1;
----
NULL

query I
SELECT MAX(col0) FROM empty WHERE col0=1;
----
NULL

statement ok
DROP TABLE empty;

statement ok
CREATE TABLE t(col0 INTEGER) as VALUES(2);

query I
SELECT MIN(col0) FROM t WHERE col0=1;
----
NULL

query I
SELECT MAX(col0) FROM t WHERE col0=1;
----
NULL

statement ok
DROP TABLE t;

0 comments on commit d0c84cc

Please sign in to comment.