Skip to content

Commit

Permalink
Use the primary_term field to identify parent documents (#27469)
Browse files Browse the repository at this point in the history
This change stops indexing the `_primary_term` field for nested documents
to allow fast retrieval of parent documents. Today we create a docvalues
field for children to ensure we have a dense datastructure on disk. Yet,
since we only use the primary term to tie-break on when we see the same
seqID on indexing having a dense datastructure is less important. We can
use this now to improve the nested docs performance and it's memory footprint.

Relates to #24362
  • Loading branch information
s1monw authored Nov 21, 2017
1 parent 6319424 commit 5a0b6d1
Show file tree
Hide file tree
Showing 18 changed files with 105 additions and 33 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,16 @@
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.DocValuesFieldExistsQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.Version;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.index.mapper.SeqNoFieldMapper;
import org.elasticsearch.index.mapper.TypeFieldMapper;

import java.util.List;
Expand Down Expand Up @@ -62,12 +65,19 @@ public static Query newNestedFilter() {
return new PrefixQuery(new Term(TypeFieldMapper.NAME, new BytesRef("__")));
}

public static Query newNonNestedFilter() {
// TODO: this is slow, make it a positive query
return new BooleanQuery.Builder()
/**
* Creates a new non-nested docs query
* @param indexVersionCreated the index version created since newer indices can identify a parent field more efficiently
*/
public static Query newNonNestedFilter(Version indexVersionCreated) {
if (indexVersionCreated.onOrAfter(Version.V_7_0_0_alpha1)) {
return new DocValuesFieldExistsQuery(SeqNoFieldMapper.PRIMARY_TERM_NAME);
} else {
return new BooleanQuery.Builder()
.add(new MatchAllDocsQuery(), Occur.FILTER)
.add(newNestedFilter(), Occur.MUST_NOT)
.build();
}
}

public static BooleanQuery filtered(@Nullable Query query, @Nullable Query filter) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ public IndexWarmer.TerminationHandle warmReader(final IndexShard indexShard, fin
}

if (hasNested) {
warmUp.add(Queries.newNonNestedFilter());
warmUp.add(Queries.newNonNestedFilter(indexSettings.getIndexVersionCreated()));
}

final CountDownLatch latch = new CountDownLatch(searcher.reader().leaves().size() * warmUp.size());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.Version;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
Expand Down Expand Up @@ -252,11 +253,17 @@ public void postParse(ParseContext context) throws IOException {
// we share the parent docs fields to ensure good compression
SequenceIDFields seqID = context.seqID();
assert seqID != null;
for (int i = 1; i < context.docs().size(); i++) {
int numDocs = context.docs().size();
final Version versionCreated = context.mapperService().getIndexSettings().getIndexVersionCreated();
final boolean includePrimaryTerm = versionCreated.before(Version.V_7_0_0_alpha1);
for (int i = 1; i < numDocs; i++) {
final Document doc = context.docs().get(i);
doc.add(seqID.seqNo);
doc.add(seqID.seqNoDocValue);
doc.add(seqID.primaryTerm);
if (includePrimaryTerm) {
// primary terms are used to distinguish between parent and nested docs since 6.1.0
doc.add(seqID.primaryTerm);
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ public Query termsQuery(List<?> values, QueryShardContext context) {
.anyMatch(indexType::equals)) {
if (context.getMapperService().hasNested()) {
// type filters are expected not to match nested docs
return Queries.newNonNestedFilter();
return Queries.newNonNestedFilter(context.indexVersionCreated());
} else {
return new MatchAllDocsQuery();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ protected Query doToQuery(QueryShardContext context) throws IOException {
Query innerQuery;
ObjectMapper objectMapper = context.nestedScope().getObjectMapper();
if (objectMapper == null) {
parentFilter = context.bitsetFilter(Queries.newNonNestedFilter());
parentFilter = context.bitsetFilter(Queries.newNonNestedFilter(context.indexVersionCreated()));
} else {
parentFilter = context.bitsetFilter(objectMapper.nestedTypeFilter());
}
Expand Down Expand Up @@ -377,7 +377,7 @@ public TopDocs[] topDocs(SearchHit[] hits) throws IOException {
SearchHit hit = hits[i];
Query rawParentFilter;
if (parentObjectMapper == null) {
rawParentFilter = Queries.newNonNestedFilter();
rawParentFilter = Queries.newNonNestedFilter(context.indexShard().indexSettings().getIndexVersionCreated());
} else {
rawParentFilter = parentObjectMapper.nestedTypeFilter();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ final class ShardSplittingQuery extends Query {
}
this.indexMetaData = indexMetaData;
this.shardId = shardId;
this.nestedParentBitSetProducer = hasNested ? newParentDocBitSetProducer() : null;
this.nestedParentBitSetProducer = hasNested ? newParentDocBitSetProducer(indexMetaData.getCreationVersion()) : null;
}
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) {
Expand Down Expand Up @@ -336,9 +336,9 @@ public float matchCost() {
* than once. There is no point in using BitsetFilterCache#BitSetProducerWarmer since we use this only as a delete by query which is
* executed on a recovery-private index writer. There is no point in caching it and it won't have a cache hit either.
*/
private static BitSetProducer newParentDocBitSetProducer() {
private static BitSetProducer newParentDocBitSetProducer(Version indexVersionCreated) {
return context -> {
Query query = Queries.newNonNestedFilter();
Query query = Queries.newNonNestedFilter(indexVersionCreated);
final IndexReaderContext topLevelContext = ReaderUtil.getTopLevelContext(context);
final IndexSearcher searcher = new IndexSearcher(topLevelContext);
searcher.setQueryCache(null);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ public Query buildFilteredQuery(Query query) {
&& typeFilter == null // when a _type filter is set, it will automatically exclude nested docs
&& new NestedHelper(mapperService()).mightMatchNestedDocs(query)
&& (aliasFilter == null || new NestedHelper(mapperService()).mightMatchNestedDocs(aliasFilter))) {
filters.add(Queries.newNonNestedFilter());
filters.add(Queries.newNonNestedFilter(mapperService().getIndexSettings().getIndexVersionCreated()));
}

if (aliasFilter != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,9 @@ class NestedAggregator extends BucketsAggregator implements SingleBucketAggregat
List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData,
boolean collectsFromSingleBucket) throws IOException {
super(name, factories, context, parentAggregator, pipelineAggregators, metaData);
Query parentFilter = parentObjectMapper != null ? parentObjectMapper.nestedTypeFilter() : Queries.newNonNestedFilter();

Query parentFilter = parentObjectMapper != null ? parentObjectMapper.nestedTypeFilter()
: Queries.newNonNestedFilter(context.mapperService().getIndexSettings().getIndexVersionCreated());
this.parentFilter = context.bitsetFilterCache().getBitSetProducer(parentFilter);
this.childFilter = childObjectMapper.nestedTypeFilter();
this.collectsFromSingleBucket = collectsFromSingleBucket;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ public ReverseNestedAggregator(String name, AggregatorFactories factories, Objec
throws IOException {
super(name, factories, context, parent, pipelineAggregators, metaData);
if (objectMapper == null) {
parentFilter = Queries.newNonNestedFilter();
parentFilter = Queries.newNonNestedFilter(context.mapperService().getIndexSettings().getIndexVersionCreated());
} else {
parentFilter = objectMapper.nestedTypeFilter();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,9 @@ public void execute(SearchContext context) {

private int findRootDocumentIfNested(SearchContext context, LeafReaderContext subReaderContext, int subDocId) throws IOException {
if (context.mapperService().hasNested()) {
BitSet bits = context.bitsetFilterCache().getBitSetProducer(Queries.newNonNestedFilter()).getBitSet(subReaderContext);
BitSet bits = context.bitsetFilterCache()
.getBitSetProducer(Queries.newNonNestedFilter(context.indexShard().indexSettings().getIndexVersionCreated()))
.getBitSet(subReaderContext);
if (!bits.get(subDocId)) {
return bits.nextSetBit(subDocId);
}
Expand Down Expand Up @@ -345,7 +347,7 @@ private SearchHit.NestedIdentity getInternalNestedIdentity(SearchContext context
}
parentFilter = nestedParentObjectMapper.nestedTypeFilter();
} else {
parentFilter = Queries.newNonNestedFilter();
parentFilter = Queries.newNonNestedFilter(context.indexShard().indexSettings().getIndexVersionCreated());
}

Query childFilter = nestedObjectMapper.nestedTypeFilter();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ private static Nested resolveNested(QueryShardContext context, NestedSortBuilder
Query parentQuery;
ObjectMapper objectMapper = context.nestedScope().getObjectMapper();
if (objectMapper == null) {
parentQuery = Queries.newNonNestedFilter();
parentQuery = Queries.newNonNestedFilter(context.indexVersionCreated());
} else {
parentQuery = objectMapper.nestedTypeFilter();
}
Expand Down
19 changes: 16 additions & 3 deletions core/src/test/java/org/apache/lucene/search/QueriesTests.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,28 @@

package org.apache.lucene.search;

import org.elasticsearch.Version;
import org.elasticsearch.common.lucene.search.Queries;
import org.elasticsearch.index.mapper.SeqNoFieldMapper;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.test.VersionUtils;

public class QueriesTests extends ESTestCase {

public void testNonNestedQuery() {
// This is a custom query that extends AutomatonQuery and want to make sure the equals method works
assertEquals(Queries.newNonNestedFilter(), Queries.newNonNestedFilter());
assertEquals(Queries.newNonNestedFilter().hashCode(), Queries.newNonNestedFilter().hashCode());
for (Version version : VersionUtils.allVersions()) {
// This is a custom query that extends AutomatonQuery and want to make sure the equals method works
assertEquals(Queries.newNonNestedFilter(version), Queries.newNonNestedFilter(version));
assertEquals(Queries.newNonNestedFilter(version).hashCode(), Queries.newNonNestedFilter(version).hashCode());
if (version.onOrAfter(Version.V_7_0_0_alpha1)) {
assertEquals(Queries.newNonNestedFilter(version), new DocValuesFieldExistsQuery(SeqNoFieldMapper.PRIMARY_TERM_NAME));
} else {
assertEquals(Queries.newNonNestedFilter(version), new BooleanQuery.Builder()
.add(new MatchAllDocsQuery(), BooleanClause.Occur.FILTER)
.add(Queries.newNestedFilter(), BooleanClause.Occur.MUST_NOT)
.build());
}
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.test.VersionUtils;
import org.mockito.Mockito;

import java.io.IOException;
Expand All @@ -58,14 +59,16 @@ protected MappedFieldType createDefaultFieldType() {

public void testTermsQueryWhenTypesAreDisabled() throws Exception {
QueryShardContext context = Mockito.mock(QueryShardContext.class);
Version indexVersionCreated = VersionUtils.randomVersionBetween(random(), Version.V_6_0_0, Version.CURRENT);
Settings indexSettings = Settings.builder()
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
.put(IndexMetaData.SETTING_VERSION_CREATED, indexVersionCreated)
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0)
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1)
.put(IndexMetaData.SETTING_INDEX_UUID, UUIDs.randomBase64UUID()).build();
IndexMetaData indexMetaData = IndexMetaData.builder(IndexMetaData.INDEX_UUID_NA_VALUE).settings(indexSettings).build();
IndexSettings mockSettings = new IndexSettings(indexMetaData, Settings.EMPTY);
Mockito.when(context.getIndexSettings()).thenReturn(mockSettings);
Mockito.when(context.indexVersionCreated()).thenReturn(indexVersionCreated);

MapperService mapperService = Mockito.mock(MapperService.class);
Set<String> types = Collections.emptySet();
Expand All @@ -84,7 +87,7 @@ public void testTermsQueryWhenTypesAreDisabled() throws Exception {

Mockito.when(mapperService.hasNested()).thenReturn(true);
query = ft.termQuery("my_type", context);
assertEquals(Queries.newNonNestedFilter(), query);
assertEquals(Queries.newNonNestedFilter(context.indexVersionCreated()), query);

types = Collections.singleton("other_type");
Mockito.when(mapperService.types()).thenReturn(types);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.mapper.IdFieldMapper;
import org.elasticsearch.index.mapper.RoutingFieldMapper;
import org.elasticsearch.index.mapper.SeqNoFieldMapper;
import org.elasticsearch.index.mapper.TypeFieldMapper;
import org.elasticsearch.index.mapper.Uid;
import org.elasticsearch.test.ESTestCase;
Expand All @@ -51,6 +52,7 @@
public class ShardSplittingQueryTests extends ESTestCase {

public void testSplitOnID() throws IOException {
SeqNoFieldMapper.SequenceIDFields sequenceIDFields = SeqNoFieldMapper.SequenceIDFields.emptySeqID();
Directory dir = newFSDirectory(createTempDir());
final int numDocs = randomIntBetween(50, 100);
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Expand All @@ -76,13 +78,15 @@ public void testSplitOnID() throws IOException {
}
docs.add(Arrays.asList(
new StringField(IdFieldMapper.NAME, Uid.encodeId(Integer.toString(j)), Field.Store.YES),
new SortedNumericDocValuesField("shard_id", shardId)
new SortedNumericDocValuesField("shard_id", shardId),
sequenceIDFields.primaryTerm
));
writer.addDocuments(docs);
} else {
writer.addDocument(Arrays.asList(
new StringField(IdFieldMapper.NAME, Uid.encodeId(Integer.toString(j)), Field.Store.YES),
new SortedNumericDocValuesField("shard_id", shardId)
new SortedNumericDocValuesField("shard_id", shardId),
sequenceIDFields.primaryTerm
));
}
}
Expand All @@ -95,6 +99,7 @@ public void testSplitOnID() throws IOException {
}

public void testSplitOnRouting() throws IOException {
SeqNoFieldMapper.SequenceIDFields sequenceIDFields = SeqNoFieldMapper.SequenceIDFields.emptySeqID();
Directory dir = newFSDirectory(createTempDir());
final int numDocs = randomIntBetween(50, 100);
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Expand Down Expand Up @@ -122,14 +127,16 @@ public void testSplitOnRouting() throws IOException {
docs.add(Arrays.asList(
new StringField(IdFieldMapper.NAME, Uid.encodeId(Integer.toString(j)), Field.Store.YES),
new StringField(RoutingFieldMapper.NAME, routing, Field.Store.YES),
new SortedNumericDocValuesField("shard_id", shardId)
new SortedNumericDocValuesField("shard_id", shardId),
sequenceIDFields.primaryTerm
));
writer.addDocuments(docs);
} else {
writer.addDocument(Arrays.asList(
new StringField(IdFieldMapper.NAME, Uid.encodeId(Integer.toString(j)), Field.Store.YES),
new StringField(RoutingFieldMapper.NAME, routing, Field.Store.YES),
new SortedNumericDocValuesField("shard_id", shardId)
new SortedNumericDocValuesField("shard_id", shardId),
sequenceIDFields.primaryTerm
));
}
}
Expand All @@ -140,6 +147,7 @@ public void testSplitOnRouting() throws IOException {
}

public void testSplitOnIdOrRouting() throws IOException {
SeqNoFieldMapper.SequenceIDFields sequenceIDFields = SeqNoFieldMapper.SequenceIDFields.emptySeqID();
Directory dir = newFSDirectory(createTempDir());
final int numDocs = randomIntBetween(50, 100);
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Expand All @@ -160,13 +168,15 @@ public void testSplitOnIdOrRouting() throws IOException {
rootDoc = Arrays.asList(
new StringField(IdFieldMapper.NAME, Uid.encodeId(Integer.toString(j)), Field.Store.YES),
new StringField(RoutingFieldMapper.NAME, routing, Field.Store.YES),
new SortedNumericDocValuesField("shard_id", shardId)
new SortedNumericDocValuesField("shard_id", shardId),
sequenceIDFields.primaryTerm
);
} else {
shardId = OperationRouting.generateShardId(metaData, Integer.toString(j), null);
rootDoc = Arrays.asList(
new StringField(IdFieldMapper.NAME, Uid.encodeId(Integer.toString(j)), Field.Store.YES),
new SortedNumericDocValuesField("shard_id", shardId)
new SortedNumericDocValuesField("shard_id", shardId),
sequenceIDFields.primaryTerm
);
}

Expand Down Expand Up @@ -194,6 +204,7 @@ public void testSplitOnIdOrRouting() throws IOException {


public void testSplitOnRoutingPartitioned() throws IOException {
SeqNoFieldMapper.SequenceIDFields sequenceIDFields = SeqNoFieldMapper.SequenceIDFields.emptySeqID();
Directory dir = newFSDirectory(createTempDir());
final int numDocs = randomIntBetween(50, 100);
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Expand Down Expand Up @@ -223,14 +234,16 @@ public void testSplitOnRoutingPartitioned() throws IOException {
docs.add(Arrays.asList(
new StringField(IdFieldMapper.NAME, Uid.encodeId(Integer.toString(j)), Field.Store.YES),
new StringField(RoutingFieldMapper.NAME, routing, Field.Store.YES),
new SortedNumericDocValuesField("shard_id", shardId)
new SortedNumericDocValuesField("shard_id", shardId),
sequenceIDFields.primaryTerm
));
writer.addDocuments(docs);
} else {
writer.addDocument(Arrays.asList(
new StringField(IdFieldMapper.NAME, Uid.encodeId(Integer.toString(j)), Field.Store.YES),
new StringField(RoutingFieldMapper.NAME, routing, Field.Store.YES),
new SortedNumericDocValuesField("shard_id", shardId)
new SortedNumericDocValuesField("shard_id", shardId),
sequenceIDFields.primaryTerm
));
}
}
Expand Down
Loading

0 comments on commit 5a0b6d1

Please sign in to comment.