Skip to content

Commit

Permalink
Preserve the order of nested documents in the Lucene index (#34225)
Browse files Browse the repository at this point in the history
Today we reverse the initial order of the nested documents when we
index them in order to ensure that parents documents appear after
their children. This means that a query will always match nested documents
in the reverse order of their offsets in the source document.
Reversing all documents is not needed so this change ensures that parents
documents appear after their children without modifying the initial order
in each nested level. This allows to match children in the order of their
appearance in the source document which is a requirement to efficiently
implement #33587. Old indices created before this change will continue
to reverse the order of nested documents to ensure backwark compatibility.
  • Loading branch information
jimczi committed Oct 3, 2018
1 parent 68cdb8b commit 20e7a64
Show file tree
Hide file tree
Showing 7 changed files with 172 additions and 53 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.lucene.all.AllEntries;
import org.elasticsearch.Version;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.IndexSettings;

Expand All @@ -34,6 +35,7 @@
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;

Expand Down Expand Up @@ -456,11 +458,38 @@ public Iterable<Document> nonRootDocuments() {
}

void postParse() {
// reverse the order of docs for nested docs support, parent should be last
if (documents.size() > 1) {
docsReversed = true;
Collections.reverse(documents);
if (indexSettings.getIndexVersionCreated().onOrAfter(Version.V_6_5_0)) {
/**
* For indices created on or after {@link Version#V_6_5_0} we preserve the order
* of the children while ensuring that parents appear after them.
*/
List<Document> newDocs = reorderParent(documents);
documents.clear();
documents.addAll(newDocs);
} else {
// reverse the order of docs for nested docs support, parent should be last
Collections.reverse(documents);
}
}
}

/**
* Returns a copy of the provided {@link List} where parent documents appear
* after their children.
*/
private List<Document> reorderParent(List<Document> docs) {
List<Document> newDocs = new ArrayList<>(docs.size());
LinkedList<Document> parents = new LinkedList<>();
for (Document doc : docs) {
while (parents.peek() != doc.getParent()){
newDocs.add(parents.poll());
}
parents.add(0, doc);
}
newDocs.addAll(parents);
return newDocs;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.BitSet;
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.Version;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.common.document.DocumentField;
Expand All @@ -35,6 +36,7 @@
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.fieldvisitor.CustomFieldsVisitor;
import org.elasticsearch.index.fieldvisitor.FieldsVisitor;
import org.elasticsearch.index.mapper.DocumentMapper;
Expand Down Expand Up @@ -344,6 +346,7 @@ private SearchHit.NestedIdentity getInternalNestedIdentity(SearchContext context
ObjectMapper current = nestedObjectMapper;
String originalName = nestedObjectMapper.name();
SearchHit.NestedIdentity nestedIdentity = null;
final IndexSettings indexSettings = context.getQueryShardContext().getIndexSettings();
do {
Query parentFilter;
nestedParentObjectMapper = current.getParentObjectMapper(mapperService);
Expand Down Expand Up @@ -373,12 +376,32 @@ private SearchHit.NestedIdentity getInternalNestedIdentity(SearchContext context
BitSet parentBits = context.bitsetFilterCache().getBitSetProducer(parentFilter).getBitSet(subReaderContext);

int offset = 0;
int nextParent = parentBits.nextSetBit(currentParent);
for (int docId = childIter.advance(currentParent + 1); docId < nextParent && docId != DocIdSetIterator.NO_MORE_DOCS;
docId = childIter.nextDoc()) {
offset++;
if (indexSettings.getIndexVersionCreated().onOrAfter(Version.V_6_5_0)) {
/**
* Starts from the previous parent and finds the offset of the
* <code>nestedSubDocID</code> within the nested children. Nested documents
* are indexed in the same order than in the source array so the offset
* of the nested child is the number of nested document with the same parent
* that appear before him.
*/
int previousParent = parentBits.prevSetBit(currentParent);
for (int docId = childIter.advance(previousParent + 1); docId < nestedSubDocId && docId != DocIdSetIterator.NO_MORE_DOCS;
docId = childIter.nextDoc()) {
offset++;
}
currentParent = nestedSubDocId;
} else {
/**
* Nested documents are in reverse order in this version so we start from the current nested document
* and find the number of documents with the same parent that appear after it.
*/
int nextParent = parentBits.nextSetBit(currentParent);
for (int docId = childIter.advance(currentParent + 1); docId < nextParent && docId != DocIdSetIterator.NO_MORE_DOCS;
docId = childIter.nextDoc()) {
offset++;
}
currentParent = nextParent;
}
currentParent = nextParent;
current = nestedObjectMapper = nestedParentObjectMapper;
int currentPrefix = current == null ? 0 : current.name().length() + 1;
nestedIdentity = new SearchHit.NestedIdentity(originalName.substring(currentPrefix), offset, nestedIdentity);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -389,28 +389,28 @@ public void testCopyToNestedField() throws Exception {
assertEquals(6, doc.docs().size());

Document nested = doc.docs().get(0);
assertFieldValue(nested, "n1.n2.target", 7L);
assertFieldValue(nested, "n1.n2.target", 3L);
assertFieldValue(nested, "n1.target");
assertFieldValue(nested, "target");

nested = doc.docs().get(2);
nested = doc.docs().get(1);
assertFieldValue(nested, "n1.n2.target", 5L);
assertFieldValue(nested, "n1.target");
assertFieldValue(nested, "target");

nested = doc.docs().get(3);
assertFieldValue(nested, "n1.n2.target", 3L);
assertFieldValue(nested, "n1.n2.target", 7L);
assertFieldValue(nested, "n1.target");
assertFieldValue(nested, "target");

Document parent = doc.docs().get(1);
Document parent = doc.docs().get(2);
assertFieldValue(parent, "target");
assertFieldValue(parent, "n1.target", 7L);
assertFieldValue(parent, "n1.target", 3L, 5L);
assertFieldValue(parent, "n1.n2.target");

parent = doc.docs().get(4);
assertFieldValue(parent, "target");
assertFieldValue(parent, "n1.target", 3L, 5L);
assertFieldValue(parent, "n1.target", 7L);
assertFieldValue(parent, "n1.n2.target");

Document root = doc.docs().get(5);
Expand Down
Loading

0 comments on commit 20e7a64

Please sign in to comment.