Use the primary_term field to identify parent documents (#27469)

This change stops indexing the `_primary_term` field for nested documents to allow fast retrieval of parent documents. Today we create a docvalues field for children to ensure we have a dense datastructure on disk. Yet, since we only use the primary term to tie-break on when we see the same seqID on indexing having a dense datastructure is less important. We can use this now to improve the nested docs performance and it's memory footprint. Relates to #24362
elastic · Nov 21, 2017 · 5a0b6d1 · 5a0b6d1
1 parent 6319424
commit 5a0b6d1
Show file tree

Hide file tree

Showing 18 changed files with 105 additions and 33 deletions.
diff --git a/core/src/main/java/org/elasticsearch/common/lucene/search/Queries.java b/core/src/main/java/org/elasticsearch/common/lucene/search/Queries.java
@@ -25,13 +25,16 @@
 import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.ConstantScoreQuery;
+import org.apache.lucene.search.DocValuesFieldExistsQuery;
 import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.MatchNoDocsQuery;
 import org.apache.lucene.search.PrefixQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.ElasticsearchException;
+import org.elasticsearch.Version;
 import org.elasticsearch.common.Nullable;
+import org.elasticsearch.index.mapper.SeqNoFieldMapper;
 import org.elasticsearch.index.mapper.TypeFieldMapper;
 
 import java.util.List;
@@ -62,12 +65,19 @@ public static Query newNestedFilter() {
         return new PrefixQuery(new Term(TypeFieldMapper.NAME, new BytesRef("__")));
     }
 
-    public static Query newNonNestedFilter() {
-        // TODO: this is slow, make it a positive query
-        return new BooleanQuery.Builder()
+    /**
+     * Creates a new non-nested docs query
+     * @param indexVersionCreated the index version created since newer indices can identify a parent field more efficiently
+     */
+    public static Query newNonNestedFilter(Version indexVersionCreated) {
+        if (indexVersionCreated.onOrAfter(Version.V_7_0_0_alpha1)) {
+            return new DocValuesFieldExistsQuery(SeqNoFieldMapper.PRIMARY_TERM_NAME);
+        } else {
+            return new BooleanQuery.Builder()
                 .add(new MatchAllDocsQuery(), Occur.FILTER)
                 .add(newNestedFilter(), Occur.MUST_NOT)
                 .build();
+        }
     }
 
     public static BooleanQuery filtered(@Nullable Query query, @Nullable Query filter) {

diff --git a/core/src/main/java/org/elasticsearch/index/cache/bitset/BitsetFilterCache.java b/core/src/main/java/org/elasticsearch/index/cache/bitset/BitsetFilterCache.java
@@ -249,7 +249,7 @@ public IndexWarmer.TerminationHandle warmReader(final IndexShard indexShard, fin
             }
 
             if (hasNested) {
-                warmUp.add(Queries.newNonNestedFilter());
+                warmUp.add(Queries.newNonNestedFilter(indexSettings.getIndexVersionCreated()));
             }
 
             final CountDownLatch latch = new CountDownLatch(searcher.reader().leaves().size() * warmUp.size());

diff --git a/core/src/main/java/org/elasticsearch/index/mapper/SeqNoFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/SeqNoFieldMapper.java
@@ -28,6 +28,7 @@
 import org.apache.lucene.search.MatchNoDocsQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.Version;
 import org.elasticsearch.common.Nullable;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.xcontent.XContentBuilder;
@@ -252,11 +253,17 @@ public void postParse(ParseContext context) throws IOException {
         // we share the parent docs fields to ensure good compression
         SequenceIDFields seqID = context.seqID();
         assert seqID != null;
-        for (int i = 1; i < context.docs().size(); i++) {
+        int numDocs = context.docs().size();
+        final Version versionCreated = context.mapperService().getIndexSettings().getIndexVersionCreated();
+        final boolean includePrimaryTerm = versionCreated.before(Version.V_7_0_0_alpha1);
+        for (int i = 1; i < numDocs; i++) {
             final Document doc = context.docs().get(i);
             doc.add(seqID.seqNo);
             doc.add(seqID.seqNoDocValue);
-            doc.add(seqID.primaryTerm);
+            if (includePrimaryTerm) {
+                // primary terms are used to distinguish between parent and nested docs since 6.1.0
+                doc.add(seqID.primaryTerm);
+            }
         }
     }
 

diff --git a/core/src/main/java/org/elasticsearch/index/mapper/TypeFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/TypeFieldMapper.java
@@ -156,7 +156,7 @@ public Query termsQuery(List<?> values, QueryShardContext context) {
                         .anyMatch(indexType::equals)) {
                     if (context.getMapperService().hasNested()) {
                         // type filters are expected not to match nested docs
-                        return Queries.newNonNestedFilter();
+                        return Queries.newNonNestedFilter(context.indexVersionCreated());
                     } else {
                         return new MatchAllDocsQuery();
                     }

diff --git a/core/src/main/java/org/elasticsearch/index/query/NestedQueryBuilder.java b/core/src/main/java/org/elasticsearch/index/query/NestedQueryBuilder.java
@@ -282,7 +282,7 @@ protected Query doToQuery(QueryShardContext context) throws IOException {
         Query innerQuery;
         ObjectMapper objectMapper = context.nestedScope().getObjectMapper();
         if (objectMapper == null) {
-            parentFilter = context.bitsetFilter(Queries.newNonNestedFilter());
+            parentFilter = context.bitsetFilter(Queries.newNonNestedFilter(context.indexVersionCreated()));
         } else {
             parentFilter = context.bitsetFilter(objectMapper.nestedTypeFilter());
         }
@@ -377,7 +377,7 @@ public TopDocs[] topDocs(SearchHit[] hits) throws IOException {
                 SearchHit hit = hits[i];
                 Query rawParentFilter;
                 if (parentObjectMapper == null) {
-                    rawParentFilter = Queries.newNonNestedFilter();
+                    rawParentFilter = Queries.newNonNestedFilter(context.indexShard().indexSettings().getIndexVersionCreated());
                 } else {
                     rawParentFilter = parentObjectMapper.nestedTypeFilter();
                 }

diff --git a/core/src/main/java/org/elasticsearch/index/shard/ShardSplittingQuery.java b/core/src/main/java/org/elasticsearch/index/shard/ShardSplittingQuery.java
@@ -71,7 +71,7 @@ final class ShardSplittingQuery extends Query {
         }
         this.indexMetaData = indexMetaData;
         this.shardId = shardId;
-        this.nestedParentBitSetProducer =  hasNested ? newParentDocBitSetProducer() : null;
+        this.nestedParentBitSetProducer =  hasNested ? newParentDocBitSetProducer(indexMetaData.getCreationVersion()) : null;
     }
     @Override
     public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) {
@@ -336,9 +336,9 @@ public float matchCost() {
      * than once. There is no point in using BitsetFilterCache#BitSetProducerWarmer since we use this only as a delete by query which is
      * executed on a recovery-private index writer. There is no point in caching it and it won't have a cache hit either.
      */
-    private static BitSetProducer newParentDocBitSetProducer() {
+    private static BitSetProducer newParentDocBitSetProducer(Version indexVersionCreated) {
         return context -> {
-                Query query = Queries.newNonNestedFilter();
+                Query query = Queries.newNonNestedFilter(indexVersionCreated);
                 final IndexReaderContext topLevelContext = ReaderUtil.getTopLevelContext(context);
                 final IndexSearcher searcher = new IndexSearcher(topLevelContext);
                 searcher.setQueryCache(null);

diff --git a/core/src/main/java/org/elasticsearch/search/DefaultSearchContext.java b/core/src/main/java/org/elasticsearch/search/DefaultSearchContext.java
@@ -270,7 +270,7 @@ public Query buildFilteredQuery(Query query) {
                 && typeFilter == null // when a _type filter is set, it will automatically exclude nested docs
                 && new NestedHelper(mapperService()).mightMatchNestedDocs(query)
                 && (aliasFilter == null || new NestedHelper(mapperService()).mightMatchNestedDocs(aliasFilter))) {
-            filters.add(Queries.newNonNestedFilter());
+            filters.add(Queries.newNonNestedFilter(mapperService().getIndexSettings().getIndexVersionCreated()));
         }
 
         if (aliasFilter != null) {

diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java
@@ -62,7 +62,9 @@ class NestedAggregator extends BucketsAggregator implements SingleBucketAggregat
                      List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData,
                      boolean collectsFromSingleBucket) throws IOException {
         super(name, factories, context, parentAggregator, pipelineAggregators, metaData);
-        Query parentFilter = parentObjectMapper != null ? parentObjectMapper.nestedTypeFilter() : Queries.newNonNestedFilter();
+
+        Query parentFilter = parentObjectMapper != null ? parentObjectMapper.nestedTypeFilter()
+            : Queries.newNonNestedFilter(context.mapperService().getIndexSettings().getIndexVersionCreated());
         this.parentFilter = context.bitsetFilterCache().getBitSetProducer(parentFilter);
         this.childFilter = childObjectMapper.nestedTypeFilter();
         this.collectsFromSingleBucket = collectsFromSingleBucket;

diff --git a/...ain/java/org/elasticsearch/search/aggregations/bucket/nested/ReverseNestedAggregator.java b/...ain/java/org/elasticsearch/search/aggregations/bucket/nested/ReverseNestedAggregator.java
@@ -54,7 +54,7 @@ public ReverseNestedAggregator(String name, AggregatorFactories factories, Objec
             throws IOException {
         super(name, factories, context, parent, pipelineAggregators, metaData);
         if (objectMapper == null) {
-            parentFilter = Queries.newNonNestedFilter();
+            parentFilter = Queries.newNonNestedFilter(context.mapperService().getIndexSettings().getIndexVersionCreated());
         } else {
             parentFilter = objectMapper.nestedTypeFilter();
         }

diff --git a/core/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java b/core/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java
@@ -181,7 +181,9 @@ public void execute(SearchContext context) {
 
     private int findRootDocumentIfNested(SearchContext context, LeafReaderContext subReaderContext, int subDocId) throws IOException {
         if (context.mapperService().hasNested()) {
-            BitSet bits = context.bitsetFilterCache().getBitSetProducer(Queries.newNonNestedFilter()).getBitSet(subReaderContext);
+            BitSet bits = context.bitsetFilterCache()
+                .getBitSetProducer(Queries.newNonNestedFilter(context.indexShard().indexSettings().getIndexVersionCreated()))
+                .getBitSet(subReaderContext);
             if (!bits.get(subDocId)) {
                 return bits.nextSetBit(subDocId);
             }
@@ -345,7 +347,7 @@ private SearchHit.NestedIdentity getInternalNestedIdentity(SearchContext context
                 }
                 parentFilter = nestedParentObjectMapper.nestedTypeFilter();
             } else {
-                parentFilter = Queries.newNonNestedFilter();
+                parentFilter = Queries.newNonNestedFilter(context.indexShard().indexSettings().getIndexVersionCreated());
             }
 
             Query childFilter = nestedObjectMapper.nestedTypeFilter();

diff --git a/core/src/main/java/org/elasticsearch/search/sort/SortBuilder.java b/core/src/main/java/org/elasticsearch/search/sort/SortBuilder.java
@@ -212,7 +212,7 @@ private static Nested resolveNested(QueryShardContext context, NestedSortBuilder
         Query parentQuery;
         ObjectMapper objectMapper = context.nestedScope().getObjectMapper();
         if (objectMapper == null) {
-            parentQuery = Queries.newNonNestedFilter();
+            parentQuery = Queries.newNonNestedFilter(context.indexVersionCreated());
         } else {
             parentQuery = objectMapper.nestedTypeFilter();
         }

diff --git a/core/src/test/java/org/apache/lucene/search/QueriesTests.java b/core/src/test/java/org/apache/lucene/search/QueriesTests.java
@@ -19,15 +19,28 @@
 
 package org.apache.lucene.search;
 
+import org.elasticsearch.Version;
 import org.elasticsearch.common.lucene.search.Queries;
+import org.elasticsearch.index.mapper.SeqNoFieldMapper;
 import org.elasticsearch.test.ESTestCase;
+import org.elasticsearch.test.VersionUtils;
 
 public class QueriesTests extends ESTestCase {
 
     public void testNonNestedQuery() {
-        // This is a custom query that extends AutomatonQuery and want to make sure the equals method works
-        assertEquals(Queries.newNonNestedFilter(), Queries.newNonNestedFilter());
-        assertEquals(Queries.newNonNestedFilter().hashCode(), Queries.newNonNestedFilter().hashCode());
+        for (Version version : VersionUtils.allVersions()) {
+            // This is a custom query that extends AutomatonQuery and want to make sure the equals method works
+            assertEquals(Queries.newNonNestedFilter(version), Queries.newNonNestedFilter(version));
+            assertEquals(Queries.newNonNestedFilter(version).hashCode(), Queries.newNonNestedFilter(version).hashCode());
+            if (version.onOrAfter(Version.V_7_0_0_alpha1)) {
+                assertEquals(Queries.newNonNestedFilter(version), new DocValuesFieldExistsQuery(SeqNoFieldMapper.PRIMARY_TERM_NAME));
+            } else {
+                assertEquals(Queries.newNonNestedFilter(version), new BooleanQuery.Builder()
+                    .add(new MatchAllDocsQuery(), BooleanClause.Occur.FILTER)
+                    .add(Queries.newNestedFilter(), BooleanClause.Occur.MUST_NOT)
+                    .build());
+            }
+        }
     }
 
 }
diff --git a/core/src/test/java/org/elasticsearch/index/mapper/TypeFieldTypeTests.java b/core/src/test/java/org/elasticsearch/index/mapper/TypeFieldTypeTests.java
@@ -44,6 +44,7 @@
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.query.QueryShardContext;
+import org.elasticsearch.test.VersionUtils;
 import org.mockito.Mockito;
 
 import java.io.IOException;
@@ -58,14 +59,16 @@ protected MappedFieldType createDefaultFieldType() {
 
     public void testTermsQueryWhenTypesAreDisabled() throws Exception {
         QueryShardContext context = Mockito.mock(QueryShardContext.class);
+        Version indexVersionCreated = VersionUtils.randomVersionBetween(random(), Version.V_6_0_0, Version.CURRENT);
         Settings indexSettings = Settings.builder()
-                .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
+                .put(IndexMetaData.SETTING_VERSION_CREATED, indexVersionCreated)
                 .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0)
                 .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1)
                 .put(IndexMetaData.SETTING_INDEX_UUID, UUIDs.randomBase64UUID()).build();
         IndexMetaData indexMetaData = IndexMetaData.builder(IndexMetaData.INDEX_UUID_NA_VALUE).settings(indexSettings).build();
         IndexSettings mockSettings = new IndexSettings(indexMetaData, Settings.EMPTY);
         Mockito.when(context.getIndexSettings()).thenReturn(mockSettings);
+        Mockito.when(context.indexVersionCreated()).thenReturn(indexVersionCreated);
 
         MapperService mapperService = Mockito.mock(MapperService.class);
         Set<String> types = Collections.emptySet();
@@ -84,7 +87,7 @@ public void testTermsQueryWhenTypesAreDisabled() throws Exception {
 
         Mockito.when(mapperService.hasNested()).thenReturn(true);
         query = ft.termQuery("my_type", context);
-        assertEquals(Queries.newNonNestedFilter(), query);
+        assertEquals(Queries.newNonNestedFilter(context.indexVersionCreated()), query);
 
         types = Collections.singleton("other_type");
         Mockito.when(mapperService.types()).thenReturn(types);

diff --git a/core/src/test/java/org/elasticsearch/index/shard/ShardSplittingQueryTests.java b/core/src/test/java/org/elasticsearch/index/shard/ShardSplittingQueryTests.java
@@ -39,6 +39,7 @@
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.index.mapper.IdFieldMapper;
 import org.elasticsearch.index.mapper.RoutingFieldMapper;
+import org.elasticsearch.index.mapper.SeqNoFieldMapper;
 import org.elasticsearch.index.mapper.TypeFieldMapper;
 import org.elasticsearch.index.mapper.Uid;
 import org.elasticsearch.test.ESTestCase;
@@ -51,6 +52,7 @@
 public class ShardSplittingQueryTests extends ESTestCase {
 
     public void testSplitOnID() throws IOException {
+        SeqNoFieldMapper.SequenceIDFields sequenceIDFields = SeqNoFieldMapper.SequenceIDFields.emptySeqID();
         Directory dir = newFSDirectory(createTempDir());
         final int numDocs = randomIntBetween(50, 100);
         RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
@@ -76,13 +78,15 @@ public void testSplitOnID() throws IOException {
                 }
                 docs.add(Arrays.asList(
                     new StringField(IdFieldMapper.NAME, Uid.encodeId(Integer.toString(j)), Field.Store.YES),
-                    new SortedNumericDocValuesField("shard_id", shardId)
+                    new SortedNumericDocValuesField("shard_id", shardId),
+                    sequenceIDFields.primaryTerm
                 ));
                 writer.addDocuments(docs);
             } else {
                 writer.addDocument(Arrays.asList(
                     new StringField(IdFieldMapper.NAME, Uid.encodeId(Integer.toString(j)), Field.Store.YES),
-                    new SortedNumericDocValuesField("shard_id", shardId)
+                    new SortedNumericDocValuesField("shard_id", shardId),
+                    sequenceIDFields.primaryTerm
                 ));
             }
         }
@@ -95,6 +99,7 @@ public void testSplitOnID() throws IOException {
     }
 
     public void testSplitOnRouting() throws IOException {
+        SeqNoFieldMapper.SequenceIDFields sequenceIDFields = SeqNoFieldMapper.SequenceIDFields.emptySeqID();
         Directory dir = newFSDirectory(createTempDir());
         final int numDocs = randomIntBetween(50, 100);
         RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
@@ -122,14 +127,16 @@ public void testSplitOnRouting() throws IOException {
                 docs.add(Arrays.asList(
                     new StringField(IdFieldMapper.NAME, Uid.encodeId(Integer.toString(j)), Field.Store.YES),
                     new StringField(RoutingFieldMapper.NAME, routing, Field.Store.YES),
-                    new SortedNumericDocValuesField("shard_id", shardId)
+                    new SortedNumericDocValuesField("shard_id", shardId),
+                    sequenceIDFields.primaryTerm
                 ));
                 writer.addDocuments(docs);
             } else {
                 writer.addDocument(Arrays.asList(
                     new StringField(IdFieldMapper.NAME, Uid.encodeId(Integer.toString(j)), Field.Store.YES),
                     new StringField(RoutingFieldMapper.NAME, routing, Field.Store.YES),
-                    new SortedNumericDocValuesField("shard_id", shardId)
+                    new SortedNumericDocValuesField("shard_id", shardId),
+                    sequenceIDFields.primaryTerm
                 ));
             }
         }
@@ -140,6 +147,7 @@ public void testSplitOnRouting() throws IOException {
     }
 
     public void testSplitOnIdOrRouting() throws IOException {
+        SeqNoFieldMapper.SequenceIDFields sequenceIDFields = SeqNoFieldMapper.SequenceIDFields.emptySeqID();
         Directory dir = newFSDirectory(createTempDir());
         final int numDocs = randomIntBetween(50, 100);
         RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
@@ -160,13 +168,15 @@ public void testSplitOnIdOrRouting() throws IOException {
                 rootDoc = Arrays.asList(
                     new StringField(IdFieldMapper.NAME, Uid.encodeId(Integer.toString(j)), Field.Store.YES),
                     new StringField(RoutingFieldMapper.NAME, routing, Field.Store.YES),
-                    new SortedNumericDocValuesField("shard_id", shardId)
+                    new SortedNumericDocValuesField("shard_id", shardId),
+                    sequenceIDFields.primaryTerm
                 );
             } else {
                 shardId = OperationRouting.generateShardId(metaData, Integer.toString(j), null);
                 rootDoc = Arrays.asList(
                     new StringField(IdFieldMapper.NAME, Uid.encodeId(Integer.toString(j)), Field.Store.YES),
-                    new SortedNumericDocValuesField("shard_id", shardId)
+                    new SortedNumericDocValuesField("shard_id", shardId),
+                    sequenceIDFields.primaryTerm
                 );
             }
 
@@ -194,6 +204,7 @@ public void testSplitOnIdOrRouting() throws IOException {
 
 
     public void testSplitOnRoutingPartitioned() throws IOException {
+        SeqNoFieldMapper.SequenceIDFields sequenceIDFields = SeqNoFieldMapper.SequenceIDFields.emptySeqID();
         Directory dir = newFSDirectory(createTempDir());
         final int numDocs = randomIntBetween(50, 100);
         RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
@@ -223,14 +234,16 @@ public void testSplitOnRoutingPartitioned() throws IOException {
                 docs.add(Arrays.asList(
                     new StringField(IdFieldMapper.NAME, Uid.encodeId(Integer.toString(j)), Field.Store.YES),
                     new StringField(RoutingFieldMapper.NAME, routing, Field.Store.YES),
-                    new SortedNumericDocValuesField("shard_id", shardId)
+                    new SortedNumericDocValuesField("shard_id", shardId),
+                    sequenceIDFields.primaryTerm
                 ));
                 writer.addDocuments(docs);
             } else {
                 writer.addDocument(Arrays.asList(
                     new StringField(IdFieldMapper.NAME, Uid.encodeId(Integer.toString(j)), Field.Store.YES),
                     new StringField(RoutingFieldMapper.NAME, routing, Field.Store.YES),
-                    new SortedNumericDocValuesField("shard_id", shardId)
+                    new SortedNumericDocValuesField("shard_id", shardId),
+                    sequenceIDFields.primaryTerm
                 ));
             }
         }