From 5e880018d620cc29037e2911abc2a18237be0bf0 Mon Sep 17 00:00:00 2001 From: Costin Leau Date: Fri, 1 Mar 2019 23:06:16 +0200 Subject: [PATCH] SQL: Enable accurate hit tracking on demand (#39527) Queries that require counting of all hits (COUNT(*) on implicit group by), now enable accurate hit tracking. Fix #37971 (cherry picked from commit 265b637cf6df08986a890b8b5daf012c2b0c1699) --- .../sql/qa/single_node/CliExplainIT.java | 1 + .../sql/execution/search/SourceGenerator.java | 3 + .../xpack/sql/planner/QueryFolder.java | 23 +++++--- .../querydsl/container/QueryContainer.java | 59 ++++++++++--------- .../search/SourceGeneratorTests.java | 8 +++ .../sql/planner/QueryTranslatorTests.java | 50 ++++++++++++++++ 6 files changed, 107 insertions(+), 37 deletions(-) diff --git a/x-pack/plugin/sql/qa/single-node/src/test/java/org/elasticsearch/xpack/sql/qa/single_node/CliExplainIT.java b/x-pack/plugin/sql/qa/single-node/src/test/java/org/elasticsearch/xpack/sql/qa/single_node/CliExplainIT.java index 5a8f757209ff7..102491a95ddb7 100644 --- a/x-pack/plugin/sql/qa/single-node/src/test/java/org/elasticsearch/xpack/sql/qa/single_node/CliExplainIT.java +++ b/x-pack/plugin/sql/qa/single-node/src/test/java/org/elasticsearch/xpack/sql/qa/single_node/CliExplainIT.java @@ -153,6 +153,7 @@ public void testExplainWithCount() throws IOException { assertThat(readLine(), startsWith(" }")); assertThat(readLine(), startsWith(" }")); assertThat(readLine(), startsWith(" ]")); + assertThat(readLine(), startsWith(" \"track_total_hits\" : 2147483647")); assertThat(readLine(), startsWith("}]")); assertEquals("", readLine()); } diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/execution/search/SourceGenerator.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/execution/search/SourceGenerator.java index 47ffcf697328c..a8876e441f749 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/execution/search/SourceGenerator.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/execution/search/SourceGenerator.java @@ -169,6 +169,9 @@ private static void optimize(QueryContainer query, SearchSourceBuilder builder) // disable source fetching (only doc values are used) disableSource(builder); } + if (query.shouldTrackHits()) { + builder.trackTotalHits(true); + } } private static void disableSource(SearchSourceBuilder builder) { diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/planner/QueryFolder.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/planner/QueryFolder.java index ef765e31f3b80..24306b319ba7a 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/planner/QueryFolder.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/planner/QueryFolder.java @@ -14,7 +14,6 @@ import org.elasticsearch.xpack.sql.expression.Expression; import org.elasticsearch.xpack.sql.expression.Expressions; import org.elasticsearch.xpack.sql.expression.Foldables; -import org.elasticsearch.xpack.sql.expression.Literal; import org.elasticsearch.xpack.sql.expression.NamedExpression; import org.elasticsearch.xpack.sql.expression.Order; import org.elasticsearch.xpack.sql.expression.function.Function; @@ -152,7 +151,8 @@ protected PhysicalPlan rule(ProjectExec project) { queryC.pseudoFunctions(), new AttributeMap<>(processors), queryC.sort(), - queryC.limit()); + queryC.limit(), + queryC.shouldTrackHits()); return new EsQueryExec(exec.source(), exec.index(), project.output(), clone); } return project; @@ -180,7 +180,8 @@ protected PhysicalPlan rule(FilterExec plan) { qContainer.pseudoFunctions(), qContainer.scalarFunctions(), qContainer.sort(), - qContainer.limit()); + qContainer.limit(), + qContainer.shouldTrackHits()); return exec.with(qContainer); } @@ -391,10 +392,16 @@ private Tuple addAggFunction(GroupByKey groupingAg if (f instanceof Count) { Count c = (Count) f; // COUNT(*) or COUNT() - if (c.field() instanceof Literal) { - AggRef ref = groupingAgg == null ? - GlobalCountRef.INSTANCE : - new GroupByRef(groupingAgg.id(), Property.COUNT, null); + if (c.field().foldable()) { + AggRef ref = null; + + if (groupingAgg == null) { + ref = GlobalCountRef.INSTANCE; + // if the count points to the total track hits, enable accurate count retrieval + queryC = queryC.withTrackHits(); + } else { + ref = new GroupByRef(groupingAgg.id(), Property.COUNT, null); + } Map pseudoFunctions = new LinkedHashMap<>(queryC.pseudoFunctions()); pseudoFunctions.put(functionId, groupingAgg); @@ -406,7 +413,7 @@ private Tuple addAggFunction(GroupByKey groupingAg queryC = queryC.with(queryC.aggs().addAgg(leafAgg)); return new Tuple<>(queryC, a); } - // the only variant left - COUNT(DISTINCT) - will be covered by the else branch below + // the only variant left - COUNT(DISTINCT) - will be covered by the else branch below as it maps to an aggregation } AggPathInput aggInput = null; diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/querydsl/container/QueryContainer.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/querydsl/container/QueryContainer.java index 960b01f17c3f3..0fa2236a6bb5a 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/querydsl/container/QueryContainer.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/querydsl/container/QueryContainer.java @@ -26,7 +26,6 @@ import org.elasticsearch.xpack.sql.querydsl.agg.Aggs; import org.elasticsearch.xpack.sql.querydsl.agg.GroupByKey; import org.elasticsearch.xpack.sql.querydsl.agg.LeafAgg; -import org.elasticsearch.xpack.sql.querydsl.container.GroupByRef.Property; import org.elasticsearch.xpack.sql.querydsl.query.BoolQuery; import org.elasticsearch.xpack.sql.querydsl.query.MatchAll; import org.elasticsearch.xpack.sql.querydsl.query.NestedQuery; @@ -81,23 +80,26 @@ public class QueryContainer { private final Set sort; private final int limit; + private final boolean trackHits; // computed private Boolean aggsOnly; private Boolean customSort; public QueryContainer() { - this(null, null, null, null, null, null, null, -1); + this(null, null, null, null, null, null, null, -1, false); } - public QueryContainer(Query query, - Aggs aggs, - List> fields, + public QueryContainer(Query query, + Aggs aggs, + List> fields, AttributeMap aliases, - Map pseudoFunctions, - AttributeMap scalarFunctions, - Set sort, - int limit) { + Map pseudoFunctions, + AttributeMap scalarFunctions, + Set sort, + int limit, + boolean trackHits) { this.query = query; this.aggs = aggs == null ? Aggs.EMPTY : aggs; this.fields = fields == null || fields.isEmpty() ? emptyList() : fields; @@ -106,6 +108,7 @@ public QueryContainer(Query query, this.scalarFunctions = scalarFunctions == null || scalarFunctions.isEmpty() ? AttributeMap.emptyAttributeMap() : scalarFunctions; this.sort = sort == null || sort.isEmpty() ? emptySet() : sort; this.limit = limit; + this.trackHits = trackHits; } /** @@ -230,38 +233,46 @@ public boolean hasColumns() { return fields.size() > 0; } + public boolean shouldTrackHits() { + return trackHits; + } + // // copy methods // public QueryContainer with(Query q) { - return new QueryContainer(q, aggs, fields, aliases, pseudoFunctions, scalarFunctions, sort, limit); + return new QueryContainer(q, aggs, fields, aliases, pseudoFunctions, scalarFunctions, sort, limit, trackHits); } public QueryContainer withAliases(AttributeMap a) { - return new QueryContainer(query, aggs, fields, a, pseudoFunctions, scalarFunctions, sort, limit); + return new QueryContainer(query, aggs, fields, a, pseudoFunctions, scalarFunctions, sort, limit, trackHits); } public QueryContainer withPseudoFunctions(Map p) { - return new QueryContainer(query, aggs, fields, aliases, p, scalarFunctions, sort, limit); + return new QueryContainer(query, aggs, fields, aliases, p, scalarFunctions, sort, limit, trackHits); } public QueryContainer with(Aggs a) { - return new QueryContainer(query, a, fields, aliases, pseudoFunctions, scalarFunctions, sort, limit); + return new QueryContainer(query, a, fields, aliases, pseudoFunctions, scalarFunctions, sort, limit, trackHits); } public QueryContainer withLimit(int l) { - return l == limit ? this : new QueryContainer(query, aggs, fields, aliases, pseudoFunctions, scalarFunctions, sort, l); + return l == limit ? this : new QueryContainer(query, aggs, fields, aliases, pseudoFunctions, scalarFunctions, sort, l, trackHits); + } + + public QueryContainer withTrackHits() { + return trackHits ? this : new QueryContainer(query, aggs, fields, aliases, pseudoFunctions, scalarFunctions, sort, limit, true); } public QueryContainer withScalarProcessors(AttributeMap procs) { - return new QueryContainer(query, aggs, fields, aliases, pseudoFunctions, procs, sort, limit); + return new QueryContainer(query, aggs, fields, aliases, pseudoFunctions, procs, sort, limit, trackHits); } public QueryContainer addSort(Sort sortable) { Set sort = new LinkedHashSet<>(this.sort); sort.add(sortable); - return new QueryContainer(query, aggs, fields, aliases, pseudoFunctions, scalarFunctions, sort, limit); + return new QueryContainer(query, aggs, fields, aliases, pseudoFunctions, scalarFunctions, sort, limit, trackHits); } private String aliasName(Attribute attr) { @@ -287,7 +298,7 @@ private Tuple nestedHitFieldRef(FieldAttribute attr.field().isAggregatable(), attr.parent().name()); nestedRefs.add(nestedFieldRef); - return new Tuple<>(new QueryContainer(q, aggs, fields, aliases, pseudoFunctions, scalarFunctions, sort, limit), + return new Tuple<>(new QueryContainer(q, aggs, fields, aliases, pseudoFunctions, scalarFunctions, sort, limit, trackHits), nestedFieldRef); } @@ -390,7 +401,7 @@ public QueryContainer addColumn(FieldExtraction ref, Attribute attr) { ExpressionId id = attr instanceof AggregateFunctionAttribute ? ((AggregateFunctionAttribute) attr).innerId() : attr.id(); return new QueryContainer(query, aggs, combine(fields, new Tuple<>(ref, id)), aliases, pseudoFunctions, scalarFunctions, - sort, limit); + sort, limit, trackHits); } public AttributeMap scalarFunctions() { @@ -401,16 +412,6 @@ public AttributeMap scalarFunctions() { // agg methods // - public QueryContainer addAggCount(GroupByKey group, ExpressionId functionId) { - FieldExtraction ref = group == null ? GlobalCountRef.INSTANCE : new GroupByRef(group.id(), Property.COUNT, null); - Map pseudoFunctions = new LinkedHashMap<>(this.pseudoFunctions); - pseudoFunctions.put(functionId.toString(), group); - return new QueryContainer(query, aggs, combine(fields, new Tuple<>(ref, functionId)), - aliases, - pseudoFunctions, - scalarFunctions, sort, limit); - } - public QueryContainer addAgg(String groupId, LeafAgg agg) { return with(aggs.addAgg(agg)); } @@ -465,4 +466,4 @@ public String toString() { throw new RuntimeException("error rendering", e); } } -} +} \ No newline at end of file diff --git a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/execution/search/SourceGeneratorTests.java b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/execution/search/SourceGeneratorTests.java index 0c56d7783f8b6..fce24758a3b4d 100644 --- a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/execution/search/SourceGeneratorTests.java +++ b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/execution/search/SourceGeneratorTests.java @@ -10,6 +10,7 @@ import org.elasticsearch.search.aggregations.AggregatorFactories.Builder; import org.elasticsearch.search.aggregations.bucket.composite.CompositeAggregationBuilder; import org.elasticsearch.search.builder.SearchSourceBuilder; +import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.search.sort.FieldSortBuilder; import org.elasticsearch.search.sort.SortOrder; import org.elasticsearch.test.ESTestCase; @@ -111,6 +112,13 @@ public void testNoSort() { assertEquals(singletonList(fieldSort("_doc").order(SortOrder.ASC)), sourceBuilder.sorts()); } + public void testTrackHits() { + SearchSourceBuilder sourceBuilder = SourceGenerator.sourceBuilder(new QueryContainer().withTrackHits(), null, + randomIntBetween(1, 10)); + assertEquals("Should have tracked hits", Integer.valueOf(SearchContext.TRACK_TOTAL_HITS_ACCURATE), + sourceBuilder.trackTotalHitsUpTo()); + } + public void testNoSortIfAgg() { QueryContainer container = new QueryContainer() .addGroups(singletonList(new GroupByValue("group_id", "group_column"))) diff --git a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/planner/QueryTranslatorTests.java b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/planner/QueryTranslatorTests.java index d3f58cf91feaa..aa23603e9060d 100644 --- a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/planner/QueryTranslatorTests.java +++ b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/planner/QueryTranslatorTests.java @@ -678,4 +678,54 @@ public void testTopHitsAggregationWithTwoArgs() { "{\"date\":{\"order\":\"desc\",\"missing\":\"_last\",\"unmapped_type\":\"date\"}}]}}}}}")); } } + + + public void testGlobalCountInImplicitGroupByForcesTrackHits() throws Exception { + PhysicalPlan p = optimizeAndPlan("SELECT COUNT(*) FROM test"); + assertEquals(EsQueryExec.class, p.getClass()); + EsQueryExec eqe = (EsQueryExec) p; + assertTrue("Should be tracking hits", eqe.queryContainer().shouldTrackHits()); + } + + public void testGlobalCountAllInImplicitGroupByForcesTrackHits() throws Exception { + PhysicalPlan p = optimizeAndPlan("SELECT COUNT(ALL *) FROM test"); + assertEquals(EsQueryExec.class, p.getClass()); + EsQueryExec eqe = (EsQueryExec) p; + assertTrue("Should be tracking hits", eqe.queryContainer().shouldTrackHits()); + } + + public void testGlobalCountInSpecificGroupByDoesNotForceTrackHits() throws Exception { + PhysicalPlan p = optimizeAndPlan("SELECT COUNT(*) FROM test GROUP BY int"); + assertEquals(EsQueryExec.class, p.getClass()); + EsQueryExec eqe = (EsQueryExec) p; + assertFalse("Should NOT be tracking hits", eqe.queryContainer().shouldTrackHits()); + } + + public void testFieldAllCountDoesNotTrackHits() throws Exception { + PhysicalPlan p = optimizeAndPlan("SELECT COUNT(ALL int) FROM test"); + assertEquals(EsQueryExec.class, p.getClass()); + EsQueryExec eqe = (EsQueryExec) p; + assertFalse("Should NOT be tracking hits", eqe.queryContainer().shouldTrackHits()); + } + + public void testFieldCountDoesNotTrackHits() throws Exception { + PhysicalPlan p = optimizeAndPlan("SELECT COUNT(int) FROM test"); + assertEquals(EsQueryExec.class, p.getClass()); + EsQueryExec eqe = (EsQueryExec) p; + assertFalse("Should NOT be tracking hits", eqe.queryContainer().shouldTrackHits()); + } + + public void testDistinctCountDoesNotTrackHits() throws Exception { + PhysicalPlan p = optimizeAndPlan("SELECT COUNT(DISTINCT int) FROM test"); + assertEquals(EsQueryExec.class, p.getClass()); + EsQueryExec eqe = (EsQueryExec) p; + assertFalse("Should NOT be tracking hits", eqe.queryContainer().shouldTrackHits()); + } + + public void testNoCountDoesNotTrackHits() throws Exception { + PhysicalPlan p = optimizeAndPlan("SELECT int FROM test"); + assertEquals(EsQueryExec.class, p.getClass()); + EsQueryExec eqe = (EsQueryExec) p; + assertFalse("Should NOT be tracking hits", eqe.queryContainer().shouldTrackHits()); + } }