From ff69a573cb841316c9e1664154200a9ceeb8abf7 Mon Sep 17 00:00:00 2001 From: Costin Leau Date: Fri, 19 Jan 2024 19:17:23 -0800 Subject: [PATCH 1/6] Reduce the number of Evals ReplaceMissingFieldWithNull creates Improve ReplaceMissingFieldWithNull to create just one eval for the missing value and have the rest point to it. This reduces the amount of EvalOperators created in the pipeline. Fix #104583 --- .../xpack/esql/EsqlTestUtils.java | 12 ++++- .../optimizer/LocalLogicalPlanOptimizer.java | 33 ++++++++----- .../esql/optimizer/LogicalPlanOptimizer.java | 16 +++++-- .../LocalLogicalPlanOptimizerTests.java | 48 +++++++++++++++++++ 4 files changed, 92 insertions(+), 17 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java index 9c8d5f420d53b..408f58fb191b5 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java @@ -148,13 +148,21 @@ public static EnrichResolution emptyPolicyResolution() { return new EnrichResolution(); } + public static SearchStats statsForExistingField(String... names) { + return fieldMatchingExistOrMissing(true, names); + } + public static SearchStats statsForMissingField(String... names) { + return fieldMatchingExistOrMissing(false, names); + } + + private static SearchStats fieldMatchingExistOrMissing(boolean exists, String... names) { return new TestSearchStats() { - private final Set missingFields = Set.of(names); + private final Set fields = Set.of(names); @Override public boolean exists(String field) { - return missingFields.contains(field) == false; + return fields.contains(field) == exists; } }; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java index 48041a2660d42..a02e570fd8b4a 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java @@ -42,6 +42,7 @@ import org.elasticsearch.xpack.ql.type.DataTypes; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.Set; @@ -129,24 +130,34 @@ private LogicalPlan missingToNull(LogicalPlan plan, SearchStats stats) { else if (plan instanceof Project project) { var projections = project.projections(); List newProjections = new ArrayList<>(projections.size()); - List literals = new ArrayList<>(); + Alias nullLiteral = null; + Attribute literalReference = null; for (NamedExpression projection : projections) { if (projection instanceof FieldAttribute f && stats.exists(f.qualifiedName()) == false) { - var alias = new Alias(f.source(), f.name(), null, Literal.of(f, null), f.id()); - literals.add(alias); - newProjections.add(alias.toAttribute()); - } else { - newProjections.add(projection); + // save the first field as null + if (nullLiteral == null) { + nullLiteral = new Alias(f.source(), f.name(), null, Literal.of(f, null), f.id()); + literalReference = nullLiteral.toAttribute(); + projection = literalReference; + } + // the rest of the missing fields are kept as aliases to the already identified null field + // since avoids creating field copies + else { + projection = new Alias(f.source(), f.name(), f.qualifier(), literalReference, f.id()); + } } + + newProjections.add(projection); } - if (literals.size() > 0) { - plan = new Eval(project.source(), project.child(), literals); + // add the first found field as null + if (nullLiteral != null) { + plan = new Eval(project.source(), project.child(), Collections.singletonList(nullLiteral)); plan = new Project(project.source(), plan, newProjections); - } else { - plan = project; } - } else { + } + // otherwise transform fields in place + else { plan = plan.transformExpressionsOnlyUp( FieldAttribute.class, f -> stats.exists(f.qualifiedName()) ? f : Literal.of(f, null) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java index 26890d9b3a4a4..b0b5ec37dbcc7 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java @@ -406,17 +406,25 @@ static class PropagateEvalFoldables extends Rule { @Override public LogicalPlan apply(LogicalPlan plan) { var collectRefs = new AttributeMap(); - // collect aliases + + java.util.function.Function replaceReference = r -> collectRefs.resolve(r, r); + + // collect aliases bottom-up plan.forEachExpressionUp(Alias.class, a -> { var c = a.child(); - if (c.foldable()) { - collectRefs.put(a.toAttribute(), c); + boolean shouldCollect = c.foldable(); + // try to resolve the expression based on an existing foldables + if (shouldCollect == false) { + c = c.transformUp(ReferenceAttribute.class, replaceReference); + shouldCollect = c.foldable(); + } + if (shouldCollect) { + collectRefs.put(a.toAttribute(), Literal.of(c)); } }); if (collectRefs.isEmpty()) { return plan; } - java.util.function.Function replaceReference = r -> collectRefs.resolve(r, r); plan = plan.transformUp(p -> { // Apply the replacement inside Filter and Eval (which shouldn't make a difference) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizerTests.java index ac2426f485fcc..4b01a93b7e709 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizerTests.java @@ -7,6 +7,7 @@ package org.elasticsearch.xpack.esql.optimizer; +import org.elasticsearch.common.util.Maps; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xpack.esql.EsqlTestUtils; import org.elasticsearch.xpack.esql.analysis.Analyzer; @@ -32,19 +33,24 @@ import org.elasticsearch.xpack.ql.plan.logical.Project; import org.elasticsearch.xpack.ql.type.DataTypes; import org.elasticsearch.xpack.ql.type.EsField; +import org.hamcrest.Matchers; import org.junit.BeforeClass; import java.util.List; +import java.util.Locale; import java.util.Map; +import static java.util.Collections.emptyMap; import static org.elasticsearch.xpack.esql.EsqlTestUtils.L; import static org.elasticsearch.xpack.esql.EsqlTestUtils.TEST_SEARCH_STATS; import static org.elasticsearch.xpack.esql.EsqlTestUtils.TEST_VERIFIER; import static org.elasticsearch.xpack.esql.EsqlTestUtils.as; import static org.elasticsearch.xpack.esql.EsqlTestUtils.loadMapping; +import static org.elasticsearch.xpack.esql.EsqlTestUtils.statsForExistingField; import static org.elasticsearch.xpack.esql.EsqlTestUtils.statsForMissingField; import static org.elasticsearch.xpack.esql.EsqlTestUtils.withDefaultLimitWarning; import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.nullValue; @@ -299,6 +305,48 @@ public void testIsNotNullOnExpression() { var source = as(filter.child(), EsRelation.class); } + public void testSparseDocument() throws Exception { + var query = """ + from large + | keep field00* + | limit 10 + """; + + int size = 256; + Map large = Maps.newLinkedHashMapWithExpectedSize(size); + for (int i = 0; i < size; i++) { + var name = String.format(Locale.ROOT, "field%03d", i); + large.put(name, new EsField(name, DataTypes.INTEGER, emptyMap(), true, false)); + } + + SearchStats searchStats = statsForExistingField("field000", "field001", "field002", "field003", "field004"); + + EsIndex index = new EsIndex("large", large); + IndexResolution getIndexResult = IndexResolution.valid(index); + var logicalOptimizer = new LogicalPlanOptimizer(new LogicalOptimizerContext(EsqlTestUtils.TEST_CFG)); + + var analyzer = new Analyzer( + new AnalyzerContext(EsqlTestUtils.TEST_CFG, new EsqlFunctionRegistry(), getIndexResult, EsqlTestUtils.emptyPolicyResolution()), + TEST_VERIFIER + ); + + var analyzed = analyzer.analyze(parser.createStatement(query)); + var optimized = logicalOptimizer.optimize(analyzed); + var localContext = new LocalLogicalOptimizerContext(EsqlTestUtils.TEST_CFG, searchStats); + var plan = new LocalLogicalPlanOptimizer(localContext).localOptimize(optimized); + + var project = as(plan, Project.class); + assertThat(project.projections(), hasSize(10)); + assertThat( + Expressions.names(project.projections()), + contains("field000", "field001", "field002", "field003", "field004", "field005", "field006", "field007", "field008", "field009") + ); + var eval = as(project.child(), Eval.class); + var field = eval.fields().get(0); + assertThat(Expressions.name(field), is("field005")); + assertThat(Alias.unwrap(field).fold(), Matchers.nullValue()); + } + private LocalRelation asEmptyRelation(Object o) { var empty = as(o, LocalRelation.class); assertThat(empty.supplier(), is(LocalSupplier.EMPTY)); From 027212dd6d3aea96e2ab370dd6f575a6f6767076 Mon Sep 17 00:00:00 2001 From: Costin Leau Date: Fri, 19 Jan 2024 19:24:33 -0800 Subject: [PATCH 2/6] Update docs/changelog/104586.yaml --- docs/changelog/104586.yaml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 docs/changelog/104586.yaml diff --git a/docs/changelog/104586.yaml b/docs/changelog/104586.yaml new file mode 100644 index 0000000000000..db1d01c22eff6 --- /dev/null +++ b/docs/changelog/104586.yaml @@ -0,0 +1,6 @@ +pr: 104586 +summary: Reduce the number of Evals `ReplaceMissingFieldWithNull` creates +area: ES|QL +type: bug +issues: + - 104583 From f8f00d075231ed0467ce02cf3fd2d9929bdcbd82 Mon Sep 17 00:00:00 2001 From: Costin Leau Date: Fri, 19 Jan 2024 22:51:35 -0800 Subject: [PATCH 3/6] Fix datatype resolution in Greatest/Least --- .../esql/expression/function/scalar/conditional/Greatest.java | 2 ++ .../esql/expression/function/scalar/conditional/Least.java | 3 +++ 2 files changed, 5 insertions(+) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/Greatest.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/Greatest.java index 02589140e98a0..84b442b4df699 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/Greatest.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/Greatest.java @@ -111,6 +111,8 @@ public Object fold() { @Override public ExpressionEvaluator.Factory toEvaluator(Function toEvaluator) { + // force datatype initialization + var dataType = dataType(); ExpressionEvaluator.Factory[] factories = children().stream() .map(e -> toEvaluator.apply(new MvMax(e.source(), e))) .toArray(ExpressionEvaluator.Factory[]::new); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/Least.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/Least.java index 912efcf7b7414..462c71098d169 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/Least.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/Least.java @@ -111,6 +111,9 @@ public Object fold() { @Override public ExpressionEvaluator.Factory toEvaluator(Function toEvaluator) { + // force datatype initialization + var dataType = dataType(); + ExpressionEvaluator.Factory[] factories = children().stream() .map(e -> toEvaluator.apply(new MvMin(e.source(), e))) .toArray(ExpressionEvaluator.Factory[]::new); From 9ea4d963ff65bdf516d178d96b68bde4f630bea6 Mon Sep 17 00:00:00 2001 From: Costin Leau Date: Tue, 23 Jan 2024 14:05:30 -0800 Subject: [PATCH 4/6] Preserve type information (one null eval per dataType) Fix subtle error in LocalPhysicalPlanOptimizer test Add more unit tests --- .../xpack/esql/action/EsqlActionIT.java | 36 ++++++++- .../optimizer/LocalLogicalPlanOptimizer.java | 29 ++++---- .../esql/planner/LocalExecutionPlanner.java | 5 ++ .../LocalPhysicalPlanOptimizerTests.java | 74 +++++++++++++++++-- .../TestPhysicalOperationProviders.java | 2 +- 5 files changed, 126 insertions(+), 20 deletions(-) diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EsqlActionIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EsqlActionIT.java index 4ff614daaac85..04e46d8ff5425 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EsqlActionIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EsqlActionIT.java @@ -39,6 +39,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; @@ -1314,7 +1315,7 @@ public void testStatsNestFields() { } } - public void testStatsMissingFields() { + public void testStatsMissingFieldWithStats() { final String node1, node2; if (randomBoolean()) { internalCluster().ensureAtLeastNumDataNodes(2); @@ -1353,6 +1354,39 @@ public void testStatsMissingFields() { } } + public void testStatsMissingFieldKeepApp() { + final String node1, node2; + if (randomBoolean()) { + internalCluster().ensureAtLeastNumDataNodes(2); + node1 = randomDataNode().getName(); + node2 = randomValueOtherThan(node1, () -> randomDataNode().getName()); + } else { + node1 = randomDataNode().getName(); + node2 = randomDataNode().getName(); + } + assertAcked( + client().admin() + .indices() + .prepareCreate("foo-index") + .setSettings(Settings.builder().put("index.routing.allocation.require._name", node1)) + .setMapping("foo_int", "type=integer", "foo_long", "type=long", "foo_float", "type=float", "foo_double", "type=double") + ); + assertAcked( + client().admin() + .indices() + .prepareCreate("bar-index") + .setSettings(Settings.builder().put("index.routing.allocation.require._name", node2)) + .setMapping("bar_int", "type=integer", "bar_long", "type=long", "bar_float", "type=float", "bar_double", "type=double") + ); + String command = String.format(Locale.ROOT, "from foo-index,bar-index"); + try (var resp = run(command)) { + var valuesList = getValuesList(resp); + assertEquals(8, resp.columns().size()); + assertEquals(0, valuesList.size()); + assertEquals(Collections.emptyList(), valuesList); + } + } + public void testCountTextField() { assertAcked(client().admin().indices().prepareCreate("test_count").setMapping("name", "type=text")); int numDocs = between(10, 1000); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java index a02e570fd8b4a..f8cb1d20ba9c5 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java @@ -7,6 +7,7 @@ package org.elasticsearch.xpack.esql.optimizer; +import org.elasticsearch.common.util.Maps; import org.elasticsearch.common.util.set.Sets; import org.elasticsearch.compute.data.Block; import org.elasticsearch.compute.data.BlockFactory; @@ -19,6 +20,7 @@ import org.elasticsearch.xpack.esql.planner.AbstractPhysicalOperationProviders; import org.elasticsearch.xpack.esql.planner.PlannerUtils; import org.elasticsearch.xpack.esql.stats.SearchStats; +import org.elasticsearch.xpack.esql.type.EsqlDataTypes; import org.elasticsearch.xpack.ql.expression.Alias; import org.elasticsearch.xpack.ql.expression.Attribute; import org.elasticsearch.xpack.ql.expression.Expression; @@ -42,8 +44,8 @@ import org.elasticsearch.xpack.ql.type.DataTypes; import java.util.ArrayList; -import java.util.Collections; import java.util.List; +import java.util.Map; import java.util.Set; import static java.util.Arrays.asList; @@ -130,29 +132,30 @@ private LogicalPlan missingToNull(LogicalPlan plan, SearchStats stats) { else if (plan instanceof Project project) { var projections = project.projections(); List newProjections = new ArrayList<>(projections.size()); - Alias nullLiteral = null; - Attribute literalReference = null; + Map nullLiteral = Maps.newLinkedHashMapWithExpectedSize(EsqlDataTypes.types().size()); for (NamedExpression projection : projections) { if (projection instanceof FieldAttribute f && stats.exists(f.qualifiedName()) == false) { - // save the first field as null - if (nullLiteral == null) { - nullLiteral = new Alias(f.source(), f.name(), null, Literal.of(f, null), f.id()); - literalReference = nullLiteral.toAttribute(); - projection = literalReference; + DataType dt = f.dataType(); + Alias nullAlias = nullLiteral.get(f.dataType()); + // save the first field as null (per datatype) + if (nullAlias == null) { + Alias alias = new Alias(f.source(), f.name(), null, Literal.of(f, null), f.id()); + nullLiteral.put(dt, alias); + projection = alias.toAttribute(); } - // the rest of the missing fields are kept as aliases to the already identified null field - // since avoids creating field copies + // otherwise point to it else { - projection = new Alias(f.source(), f.name(), f.qualifier(), literalReference, f.id()); + // since avoids creating field copies + projection = new Alias(f.source(), f.name(), f.qualifier(), nullAlias.toAttribute(), f.id()); } } newProjections.add(projection); } // add the first found field as null - if (nullLiteral != null) { - plan = new Eval(project.source(), project.child(), Collections.singletonList(nullLiteral)); + if (nullLiteral.size() > 0) { + plan = new Eval(project.source(), project.child(), new ArrayList<>(nullLiteral.values())); plan = new Project(project.source(), plan, newProjections); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java index 5d6efa672956c..8f4dd902a44e4 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java @@ -628,6 +628,11 @@ public String describe() { Stream.of(sinkOperatorFactory) ).map(Describable::describe).collect(joining("\n\\_", "\\_", "")); } + + @Override + public String toString() { + return describe(); + } } /** diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java index 2716c4ff5195e..e70495401cca4 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java @@ -29,6 +29,7 @@ import org.elasticsearch.xpack.esql.plan.physical.EsStatsQueryExec.Stat; import org.elasticsearch.xpack.esql.plan.physical.EstimatesRowSize; import org.elasticsearch.xpack.esql.plan.physical.ExchangeExec; +import org.elasticsearch.xpack.esql.plan.physical.FieldExtractExec; import org.elasticsearch.xpack.esql.plan.physical.LimitExec; import org.elasticsearch.xpack.esql.plan.physical.LocalSourceExec; import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan; @@ -44,6 +45,7 @@ import org.elasticsearch.xpack.esql.type.EsqlDataTypes; import org.elasticsearch.xpack.ql.expression.Alias; import org.elasticsearch.xpack.ql.expression.Expressions; +import org.elasticsearch.xpack.ql.expression.ReferenceAttribute; import org.elasticsearch.xpack.ql.expression.function.FunctionRegistry; import org.elasticsearch.xpack.ql.index.EsIndex; import org.elasticsearch.xpack.ql.index.IndexResolution; @@ -65,6 +67,7 @@ import static org.elasticsearch.xpack.esql.plan.physical.EsStatsQueryExec.StatsType; import static org.hamcrest.Matchers.contains; import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.nullValue; @@ -374,28 +377,89 @@ public boolean exists(String field) { assertThat(Expressions.names(localSource.output()), contains("count", "seen")); } + /** + * Expects + * LimitExec[500[INTEGER]] + * \_ExchangeExec[[],false] + * \_ProjectExec[[_meta_field{f}#9, emp_no{f}#3, first_name{f}#4, gender{f}#5, job{f}#10, job.raw{f}#11, languages{f}#6, last_n + * ame{f}#7, long_noidx{f}#12, salary{f}#8]] + * \_FieldExtractExec[_meta_field{f}#9, emp_no{f}#3, first_name{f}#4, gen..] + * \_EsQueryExec[test], query[{"exists":{"field":"emp_no","boost":1.0}}][_doc{f}#13], limit[500], sort[] estimatedRowSize[324] + */ public void testIsNotNullPushdownFilter() { var plan = plan("from test | where emp_no is not null"); var limit = as(plan, LimitExec.class); var exchange = as(limit.child(), ExchangeExec.class); - var query = as(exchange.child(), EsQueryExec.class); + var project = as(exchange.child(), ProjectExec.class); + var field = as(project.child(), FieldExtractExec.class); + var query = as(field.child(), EsQueryExec.class); assertThat(query.limit().fold(), is(500)); var expected = QueryBuilders.existsQuery("emp_no"); assertThat(query.query().toString(), is(expected.toString())); } + /** + * Expects + * LimitExec[500[INTEGER]] + * \_ExchangeExec[[],false] + * \_ProjectExec[[_meta_field{f}#9, emp_no{f}#3, first_name{f}#4, gender{f}#5, job{f}#10, job.raw{f}#11, languages{f}#6, last_n + * ame{f}#7, long_noidx{f}#12, salary{f}#8]] + * \_FieldExtractExec[_meta_field{f}#9, emp_no{f}#3, first_name{f}#4, gen..] + * \_EsQueryExec[test], query[{"bool":{"must_not":[{"exists":{"field":"emp_no","boost":1.0}}],"boost":1.0}}][_doc{f}#13], limit[500], sort[] estimatedRowSize[324] + */ public void testIsNullPushdownFilter() { var plan = plan("from test | where emp_no is null"); var limit = as(plan, LimitExec.class); var exchange = as(limit.child(), ExchangeExec.class); - var query = as(exchange.child(), EsQueryExec.class); + var project = as(exchange.child(), ProjectExec.class); + var field = as(project.child(), FieldExtractExec.class); + var query = as(field.child(), EsQueryExec.class); assertThat(query.limit().fold(), is(500)); var expected = QueryBuilders.boolQuery().mustNot(QueryBuilders.existsQuery("emp_no")); assertThat(query.query().toString(), is(expected.toString())); } + /** + * Expects + * LimitExec[500[INTEGER]] + * \_ExchangeExec[[],false] + * \_ProjectExec[[_meta_field{f}#8, emp_no{r}#2, first_name{r}#3, gender{f}#4, job{f}#9, job.raw{f}#10, languages{f}#5, first_n + * ame{r}#3 AS last_name, long_noidx{f}#11, emp_no{r}#2 AS salary]] + * \_FieldExtractExec[_meta_field{f}#8, gender{f}#4, job{f}#9, job.raw{f}..] + * \_EvalExec[[null[INTEGER] AS emp_no, null[KEYWORD] AS first_name]] + * \_EsQueryExec[test], query[][_doc{f}#12], limit[500], sort[] estimatedRowSize[270] + */ + public void testMissingFieldsDoNotGetExtracted() { + var stats = EsqlTestUtils.statsForMissingField("first_name", "last_name", "emp_no", "salary"); + + var plan = plan("from test", stats); + var limit = as(plan, LimitExec.class); + var exchange = as(limit.child(), ExchangeExec.class); + var project = as(exchange.child(), ProjectExec.class); + var projections = project.projections(); + assertThat( + Expressions.names(projections), + contains("_meta_field", "emp_no", "first_name", "gender", "job", "job.raw", "languages", "last_name", "long_noidx", "salary") + ); + // emp_no + assertThat(projections.get(1), instanceOf(ReferenceAttribute.class)); + // first_name + assertThat(projections.get(2), instanceOf(ReferenceAttribute.class)); + + // last_name --> first_name + var nullAlias = Alias.unwrap(projections.get(7)); + assertThat(Expressions.name(nullAlias), is("first_name")); + // salary --> emp_no + nullAlias = Alias.unwrap(projections.get(9)); + assertThat(Expressions.name(nullAlias), is("emp_no")); + // check field extraction is skipped and that evaled fields are not extracted anymore + var field = as(project.child(), FieldExtractExec.class); + var fields = field.attributesToExtract(); + assertThat(Expressions.names(fields), contains("_meta_field", "gender", "job", "job.raw", "languages", "long_noidx")); + } + private QueryBuilder wrapWithSingleQuery(QueryBuilder inner, String fieldName, Source source) { return FilterTests.singleValueQuery(inner, fieldName, source); } @@ -422,15 +486,15 @@ private PhysicalPlan plan(String query, SearchStats stats) { private PhysicalPlan optimizedPlan(PhysicalPlan plan, SearchStats searchStats) { // System.out.println("* Physical Before\n" + plan); - var p = EstimatesRowSize.estimateRowSize(0, physicalPlanOptimizer.optimize(plan)); - // System.out.println("* Physical After\n" + p); + var physicalPlan = EstimatesRowSize.estimateRowSize(0, physicalPlanOptimizer.optimize(plan)); + // System.out.println("* Physical After\n" + physicalPlan); // the real execution breaks the plan at the exchange and then decouples the plan // this is of no use in the unit tests, which checks the plan as a whole instead of each // individually hence why here the plan is kept as is var logicalTestOptimizer = new LocalLogicalPlanOptimizer(new LocalLogicalOptimizerContext(config, searchStats)); var physicalTestOptimizer = new TestLocalPhysicalPlanOptimizer(new LocalPhysicalOptimizerContext(config, searchStats), true); - var l = PlannerUtils.localPlan(plan, logicalTestOptimizer, physicalTestOptimizer); + var l = PlannerUtils.localPlan(physicalPlan, logicalTestOptimizer, physicalTestOptimizer); // handle local reduction alignment l = PhysicalPlanOptimizerTests.localRelationshipAlignment(l); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/TestPhysicalOperationProviders.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/TestPhysicalOperationProviders.java index 8377530b9fbc2..c7f7496e53c1c 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/TestPhysicalOperationProviders.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/TestPhysicalOperationProviders.java @@ -204,7 +204,7 @@ public Operator get(DriverContext driverContext) { @Override public String describe() { - return "TestFieldExtractOperator"; + return "TestFieldExtractOperator(" + columnName + ")"; } } From 2682ac6583753d81da649e91599a0994d59a6aac Mon Sep 17 00:00:00 2001 From: Costin Leau Date: Tue, 23 Jan 2024 14:12:00 -0800 Subject: [PATCH 5/6] Fix long javadoc --- .../xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java index e70495401cca4..cc270d4121712 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java @@ -406,7 +406,8 @@ public void testIsNotNullPushdownFilter() { * \_ProjectExec[[_meta_field{f}#9, emp_no{f}#3, first_name{f}#4, gender{f}#5, job{f}#10, job.raw{f}#11, languages{f}#6, last_n * ame{f}#7, long_noidx{f}#12, salary{f}#8]] * \_FieldExtractExec[_meta_field{f}#9, emp_no{f}#3, first_name{f}#4, gen..] - * \_EsQueryExec[test], query[{"bool":{"must_not":[{"exists":{"field":"emp_no","boost":1.0}}],"boost":1.0}}][_doc{f}#13], limit[500], sort[] estimatedRowSize[324] + * \_EsQueryExec[test], query[{"bool":{"must_not":[{"exists":{"field":"emp_no","boost":1.0}}],"boost":1.0}}][_doc{f}#13], + * limit[500], sort[] estimatedRowSize[324] */ public void testIsNullPushdownFilter() { var plan = plan("from test | where emp_no is null"); From 9ec8394f8dbb71ecc20b67abb5a6b37aaf4d0d97 Mon Sep 17 00:00:00 2001 From: Costin Leau Date: Tue, 23 Jan 2024 15:54:06 -0800 Subject: [PATCH 6/6] Fix describe method --- .../xpack/esql/planner/TestPhysicalOperationProviders.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/TestPhysicalOperationProviders.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/TestPhysicalOperationProviders.java index 7fb28f9a0c96f..f78b9bcfd5c98 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/TestPhysicalOperationProviders.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/TestPhysicalOperationProviders.java @@ -203,9 +203,11 @@ public void close() { private class TestFieldExtractOperatorFactory implements Operator.OperatorFactory { final Operator op; + private String columnName; TestFieldExtractOperatorFactory(Attribute attr, MappedFieldType.FieldExtractPreference extractPreference) { this.op = new TestFieldExtractOperator(attr.name(), attr.dataType(), extractPreference); + this.columnName = attr.name(); } @Override