Skip to content

Commit

Permalink
ESQL: Disable pushdown of WHERE past STATS (#115308) (#115419)
Browse files Browse the repository at this point in the history
Fix #115281

Let's disable the faulty optimization for now and re-introduce it later,
correctly.
  • Loading branch information
alex-spies authored Oct 23, 2024
1 parent 7c69293 commit 81bb57b
Show file tree
Hide file tree
Showing 6 changed files with 67 additions and 29 deletions.
6 changes: 6 additions & 0 deletions docs/changelog/115308.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 115308
summary: "ESQL: Disable pushdown of WHERE past STATS"
area: ES|QL
type: bug
issues:
- 115281
Original file line number Diff line number Diff line change
Expand Up @@ -2291,6 +2291,33 @@ m:integer |a:double |x:integer
74999 |48249.0 |0
;

statsWithFilterOnGroups
required_capability: fix_filter_pushdown_past_stats
FROM employees
| STATS v = VALUES(emp_no) by job_positions | WHERE job_positions == "Accountant" | MV_EXPAND v | SORT v
;

v:integer | job_positions:keyword
10001 | Accountant
10012 | Accountant
10016 | Accountant
10023 | Accountant
10025 | Accountant
10028 | Accountant
10034 | Accountant
10037 | Accountant
10044 | Accountant
10045 | Accountant
10050 | Accountant
10051 | Accountant
10066 | Accountant
10081 | Accountant
10085 | Accountant
10089 | Accountant
10092 | Accountant
10094 | Accountant
;


statsWithFiltering
required_capability: per_agg_filtering
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,12 @@ public enum Cap {
/**
* Support for semantic_text field mapping
*/
SEMANTIC_TEXT_TYPE(EsqlCorePlugin.SEMANTIC_TEXT_FEATURE_FLAG);
SEMANTIC_TEXT_TYPE(EsqlCorePlugin.SEMANTIC_TEXT_FEATURE_FLAG),
/**
* Fix for an optimization that caused wrong results
* https://github.com/elastic/elasticsearch/issues/115281
*/
FIX_FILTER_PUSHDOWN_PAST_STATS;

private final boolean enabled;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
import org.elasticsearch.xpack.esql.core.expression.Expressions;
import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute;
import org.elasticsearch.xpack.esql.core.expression.predicate.Predicates;
import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction;
import org.elasticsearch.xpack.esql.plan.logical.Aggregate;
import org.elasticsearch.xpack.esql.plan.logical.Enrich;
import org.elasticsearch.xpack.esql.plan.logical.Eval;
import org.elasticsearch.xpack.esql.plan.logical.Filter;
Expand All @@ -38,18 +36,13 @@ protected LogicalPlan rule(Filter filter) {
LogicalPlan child = filter.child();
Expression condition = filter.condition();

// TODO: Push down past STATS if the filter is only on the groups; but take into account how `STATS ... BY field` handles
// multi-values: It seems to be equivalent to `EVAL field = MV_DEDUPE(field) | MV_EXPAND(field) | STATS ... BY field`, where the
// last `STATS ... BY field` can assume that `field` is single-valued (to be checked more thoroughly).
// https://github.com/elastic/elasticsearch/issues/115311
if (child instanceof Filter f) {
// combine nodes into a single Filter with updated ANDed condition
plan = f.with(Predicates.combineAnd(List.of(f.condition(), condition)));
} else if (child instanceof Aggregate agg) { // TODO: re-evaluate along with multi-value support
// Only push [parts of] a filter past an agg if these/it operates on agg's grouping[s], not output.
plan = maybePushDownPastUnary(
filter,
agg,
e -> e instanceof Attribute && agg.output().contains(e) && agg.groupings().contains(e) == false
|| e instanceof AggregateFunction,
NO_OP
);
} else if (child instanceof Eval eval) {
// Don't push if Filter (still) contains references to Eval's fields.
// Account for simple aliases in the Eval, though - these shouldn't stop us.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -738,6 +738,7 @@ public void testMultipleCombineLimits() {
);
}

@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/115311")
public void testSelectivelyPushDownFilterPastRefAgg() {
// expected plan: "from test | where emp_no > 1 and emp_no < 3 | stats x = count(1) by emp_no | where x > 7"
LogicalPlan plan = optimizedPlan("""
Expand Down Expand Up @@ -790,6 +791,7 @@ public void testNoPushDownOrFilterPastAgg() {
assertTrue(stats.child() instanceof EsRelation);
}

@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/115311")
public void testSelectivePushDownComplexFilterPastAgg() {
// expected plan: from test | emp_no > 0 | stats x = count(1) by emp_no | where emp_no < 3 or x > 9
LogicalPlan plan = optimizedPlan("""
Expand Down Expand Up @@ -1393,13 +1395,15 @@ public void testPushDownLimitThroughMultipleSort_AfterMvExpand2() {
}

/**
* TODO: Push down the filter correctly https://github.com/elastic/elasticsearch/issues/115311
*
* Expected
* Limit[5[INTEGER]]
* \_Aggregate[[first_name{f}#232],[MAX(salary{f}#233) AS max_s, first_name{f}#232]]
* \_Filter[ISNOTNULL(first_name{f}#232)]
* \_MvExpand[first_name{f}#232]
* \_TopN[[Order[emp_no{f}#231,ASC,LAST]],50[INTEGER]]
* \_EsRelation[employees][emp_no{f}#231, first_name{f}#232, salary{f}#233]
* \_Filter[ISNOTNULL(first_name{r}#23)]
* \_Aggregate[STANDARD,[first_name{r}#23],[MAX(salary{f}#18,true[BOOLEAN]) AS max_s, first_name{r}#23]]
* \_MvExpand[first_name{f}#14,first_name{r}#23]
* \_TopN[[Order[emp_no{f}#13,ASC,LAST]],50[INTEGER]]
* \_EsRelation[test][_meta_field{f}#19, emp_no{f}#13, first_name{f}#14, ..]
*/
public void testDontPushDownLimitPastAggregate_AndMvExpand() {
LogicalPlan plan = optimizedPlan("""
Expand All @@ -1413,25 +1417,27 @@ public void testDontPushDownLimitPastAggregate_AndMvExpand() {
| limit 5""");

var limit = as(plan, Limit.class);
var filter = as(limit.child(), Filter.class);
assertThat(limit.limit().fold(), equalTo(5));
var agg = as(limit.child(), Aggregate.class);
var filter = as(agg.child(), Filter.class);
var mvExp = as(filter.child(), MvExpand.class);
var agg = as(filter.child(), Aggregate.class);
var mvExp = as(agg.child(), MvExpand.class);
var topN = as(mvExp.child(), TopN.class);
assertThat(topN.limit().fold(), equalTo(50));
assertThat(orderNames(topN), contains("emp_no"));
as(topN.child(), EsRelation.class);
}

/**
* TODO: Push down the filter correctly https://github.com/elastic/elasticsearch/issues/115311
*
* Expected
* Limit[5[INTEGER]]
* \_Aggregate[[first_name{f}#262],[MAX(salary{f}#263) AS max_s, first_name{f}#262]]
* \_Filter[ISNOTNULL(first_name{f}#262)]
* \_Limit[50[INTEGER]]
* \_MvExpand[first_name{f}#262]
* \_Limit[50[INTEGER]]
* \_EsRelation[employees][emp_no{f}#261, first_name{f}#262, salary{f}#263]
* \_Filter[ISNOTNULL(first_name{r}#22)]
* \_Aggregate[STANDARD,[first_name{r}#22],[MAX(salary{f}#17,true[BOOLEAN]) AS max_s, first_name{r}#22]]
* \_Limit[50[INTEGER]]
* \_MvExpand[first_name{f}#13,first_name{r}#22]
* \_Limit[50[INTEGER]]
* \_EsRelation[test][_meta_field{f}#18, emp_no{f}#12, first_name{f}#13, ..]
*/
public void testPushDown_TheRightLimit_PastMvExpand() {
LogicalPlan plan = optimizedPlan("""
Expand All @@ -1445,9 +1451,9 @@ public void testPushDown_TheRightLimit_PastMvExpand() {

var limit = as(plan, Limit.class);
assertThat(limit.limit().fold(), equalTo(5));
var agg = as(limit.child(), Aggregate.class);
var filter = as(agg.child(), Filter.class);
limit = as(filter.child(), Limit.class);
var filter = as(limit.child(), Filter.class);
var agg = as(filter.child(), Aggregate.class);
limit = as(agg.child(), Limit.class);
assertThat(limit.limit().fold(), equalTo(50));
var mvExp = as(limit.child(), MvExpand.class);
limit = as(mvExp.child(), Limit.class);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ public void testPushDownLikeRlikeFilter() {

// from ... | where a > 1 | stats count(1) by b | where count(1) >= 3 and b < 2
// => ... | where a > 1 and b < 2 | stats count(1) by b | where count(1) >= 3
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/115311")
public void testSelectivelyPushDownFilterPastFunctionAgg() {
EsRelation relation = relation();
GreaterThan conditionA = greaterThanOf(getFieldAttribute("a"), ONE);
Expand Down

0 comments on commit 81bb57b

Please sign in to comment.