From 81bb57b5280ced9598bbf6e4eb026294b021c7cc Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Wed, 23 Oct 2024 16:39:31 +0200 Subject: [PATCH] ESQL: Disable pushdown of WHERE past STATS (#115308) (#115419) Fix https://github.com/elastic/elasticsearch/issues/115281 Let's disable the faulty optimization for now and re-introduce it later, correctly. --- docs/changelog/115308.yaml | 6 +++ .../src/main/resources/stats.csv-spec | 27 +++++++++++++ .../xpack/esql/action/EsqlCapabilities.java | 7 +++- .../logical/PushDownAndCombineFilters.java | 15 ++----- .../optimizer/LogicalPlanOptimizerTests.java | 40 +++++++++++-------- .../PushDownAndCombineFiltersTests.java | 1 + 6 files changed, 67 insertions(+), 29 deletions(-) create mode 100644 docs/changelog/115308.yaml diff --git a/docs/changelog/115308.yaml b/docs/changelog/115308.yaml new file mode 100644 index 0000000000000..163f0232a3e58 --- /dev/null +++ b/docs/changelog/115308.yaml @@ -0,0 +1,6 @@ +pr: 115308 +summary: "ESQL: Disable pushdown of WHERE past STATS" +area: ES|QL +type: bug +issues: + - 115281 diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec index 5942130a9ed15..92abf36d9b671 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec @@ -2291,6 +2291,33 @@ m:integer |a:double |x:integer 74999 |48249.0 |0 ; +statsWithFilterOnGroups +required_capability: fix_filter_pushdown_past_stats +FROM employees +| STATS v = VALUES(emp_no) by job_positions | WHERE job_positions == "Accountant" | MV_EXPAND v | SORT v +; + +v:integer | job_positions:keyword + 10001 | Accountant + 10012 | Accountant + 10016 | Accountant + 10023 | Accountant + 10025 | Accountant + 10028 | Accountant + 10034 | Accountant + 10037 | Accountant + 10044 | Accountant + 10045 | Accountant + 10050 | Accountant + 10051 | Accountant + 10066 | Accountant + 10081 | Accountant + 10085 | Accountant + 10089 | Accountant + 10092 | Accountant + 10094 | Accountant +; + statsWithFiltering required_capability: per_agg_filtering diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 4337a947fd8d7..237d093b381b7 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -405,7 +405,12 @@ public enum Cap { /** * Support for semantic_text field mapping */ - SEMANTIC_TEXT_TYPE(EsqlCorePlugin.SEMANTIC_TEXT_FEATURE_FLAG); + SEMANTIC_TEXT_TYPE(EsqlCorePlugin.SEMANTIC_TEXT_FEATURE_FLAG), + /** + * Fix for an optimization that caused wrong results + * https://github.com/elastic/elasticsearch/issues/115281 + */ + FIX_FILTER_PUSHDOWN_PAST_STATS; private final boolean enabled; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PushDownAndCombineFilters.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PushDownAndCombineFilters.java index ed09d0bc16754..15e49c22a44db 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PushDownAndCombineFilters.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PushDownAndCombineFilters.java @@ -15,8 +15,6 @@ import org.elasticsearch.xpack.esql.core.expression.Expressions; import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; import org.elasticsearch.xpack.esql.core.expression.predicate.Predicates; -import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction; -import org.elasticsearch.xpack.esql.plan.logical.Aggregate; import org.elasticsearch.xpack.esql.plan.logical.Enrich; import org.elasticsearch.xpack.esql.plan.logical.Eval; import org.elasticsearch.xpack.esql.plan.logical.Filter; @@ -38,18 +36,13 @@ protected LogicalPlan rule(Filter filter) { LogicalPlan child = filter.child(); Expression condition = filter.condition(); + // TODO: Push down past STATS if the filter is only on the groups; but take into account how `STATS ... BY field` handles + // multi-values: It seems to be equivalent to `EVAL field = MV_DEDUPE(field) | MV_EXPAND(field) | STATS ... BY field`, where the + // last `STATS ... BY field` can assume that `field` is single-valued (to be checked more thoroughly). + // https://github.com/elastic/elasticsearch/issues/115311 if (child instanceof Filter f) { // combine nodes into a single Filter with updated ANDed condition plan = f.with(Predicates.combineAnd(List.of(f.condition(), condition))); - } else if (child instanceof Aggregate agg) { // TODO: re-evaluate along with multi-value support - // Only push [parts of] a filter past an agg if these/it operates on agg's grouping[s], not output. - plan = maybePushDownPastUnary( - filter, - agg, - e -> e instanceof Attribute && agg.output().contains(e) && agg.groupings().contains(e) == false - || e instanceof AggregateFunction, - NO_OP - ); } else if (child instanceof Eval eval) { // Don't push if Filter (still) contains references to Eval's fields. // Account for simple aliases in the Eval, though - these shouldn't stop us. diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index 8d7c1997f78e3..ff7675504d6ff 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -738,6 +738,7 @@ public void testMultipleCombineLimits() { ); } + @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/115311") public void testSelectivelyPushDownFilterPastRefAgg() { // expected plan: "from test | where emp_no > 1 and emp_no < 3 | stats x = count(1) by emp_no | where x > 7" LogicalPlan plan = optimizedPlan(""" @@ -790,6 +791,7 @@ public void testNoPushDownOrFilterPastAgg() { assertTrue(stats.child() instanceof EsRelation); } + @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/115311") public void testSelectivePushDownComplexFilterPastAgg() { // expected plan: from test | emp_no > 0 | stats x = count(1) by emp_no | where emp_no < 3 or x > 9 LogicalPlan plan = optimizedPlan(""" @@ -1393,13 +1395,15 @@ public void testPushDownLimitThroughMultipleSort_AfterMvExpand2() { } /** + * TODO: Push down the filter correctly https://github.com/elastic/elasticsearch/issues/115311 + * * Expected * Limit[5[INTEGER]] - * \_Aggregate[[first_name{f}#232],[MAX(salary{f}#233) AS max_s, first_name{f}#232]] - * \_Filter[ISNOTNULL(first_name{f}#232)] - * \_MvExpand[first_name{f}#232] - * \_TopN[[Order[emp_no{f}#231,ASC,LAST]],50[INTEGER]] - * \_EsRelation[employees][emp_no{f}#231, first_name{f}#232, salary{f}#233] + * \_Filter[ISNOTNULL(first_name{r}#23)] + * \_Aggregate[STANDARD,[first_name{r}#23],[MAX(salary{f}#18,true[BOOLEAN]) AS max_s, first_name{r}#23]] + * \_MvExpand[first_name{f}#14,first_name{r}#23] + * \_TopN[[Order[emp_no{f}#13,ASC,LAST]],50[INTEGER]] + * \_EsRelation[test][_meta_field{f}#19, emp_no{f}#13, first_name{f}#14, ..] */ public void testDontPushDownLimitPastAggregate_AndMvExpand() { LogicalPlan plan = optimizedPlan(""" @@ -1413,10 +1417,10 @@ public void testDontPushDownLimitPastAggregate_AndMvExpand() { | limit 5"""); var limit = as(plan, Limit.class); + var filter = as(limit.child(), Filter.class); assertThat(limit.limit().fold(), equalTo(5)); - var agg = as(limit.child(), Aggregate.class); - var filter = as(agg.child(), Filter.class); - var mvExp = as(filter.child(), MvExpand.class); + var agg = as(filter.child(), Aggregate.class); + var mvExp = as(agg.child(), MvExpand.class); var topN = as(mvExp.child(), TopN.class); assertThat(topN.limit().fold(), equalTo(50)); assertThat(orderNames(topN), contains("emp_no")); @@ -1424,14 +1428,16 @@ public void testDontPushDownLimitPastAggregate_AndMvExpand() { } /** + * TODO: Push down the filter correctly https://github.com/elastic/elasticsearch/issues/115311 + * * Expected * Limit[5[INTEGER]] - * \_Aggregate[[first_name{f}#262],[MAX(salary{f}#263) AS max_s, first_name{f}#262]] - * \_Filter[ISNOTNULL(first_name{f}#262)] - * \_Limit[50[INTEGER]] - * \_MvExpand[first_name{f}#262] - * \_Limit[50[INTEGER]] - * \_EsRelation[employees][emp_no{f}#261, first_name{f}#262, salary{f}#263] + * \_Filter[ISNOTNULL(first_name{r}#22)] + * \_Aggregate[STANDARD,[first_name{r}#22],[MAX(salary{f}#17,true[BOOLEAN]) AS max_s, first_name{r}#22]] + * \_Limit[50[INTEGER]] + * \_MvExpand[first_name{f}#13,first_name{r}#22] + * \_Limit[50[INTEGER]] + * \_EsRelation[test][_meta_field{f}#18, emp_no{f}#12, first_name{f}#13, ..] */ public void testPushDown_TheRightLimit_PastMvExpand() { LogicalPlan plan = optimizedPlan(""" @@ -1445,9 +1451,9 @@ public void testPushDown_TheRightLimit_PastMvExpand() { var limit = as(plan, Limit.class); assertThat(limit.limit().fold(), equalTo(5)); - var agg = as(limit.child(), Aggregate.class); - var filter = as(agg.child(), Filter.class); - limit = as(filter.child(), Limit.class); + var filter = as(limit.child(), Filter.class); + var agg = as(filter.child(), Aggregate.class); + limit = as(agg.child(), Limit.class); assertThat(limit.limit().fold(), equalTo(50)); var mvExp = as(limit.child(), MvExpand.class); limit = as(mvExp.child(), Limit.class); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PushDownAndCombineFiltersTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PushDownAndCombineFiltersTests.java index 49a738f4f4fa3..e159e5ed0bd7d 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PushDownAndCombineFiltersTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PushDownAndCombineFiltersTests.java @@ -213,6 +213,7 @@ public void testPushDownLikeRlikeFilter() { // from ... | where a > 1 | stats count(1) by b | where count(1) >= 3 and b < 2 // => ... | where a > 1 and b < 2 | stats count(1) by b | where count(1) >= 3 + @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/115311") public void testSelectivelyPushDownFilterPastFunctionAgg() { EsRelation relation = relation(); GreaterThan conditionA = greaterThanOf(getFieldAttribute("a"), ONE);