Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert 74559 (Avoid global ordinals in composite) #78846

Merged
merged 3 commits into from
Oct 13, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/reference/mapping/params/eager-global-ordinals.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ ordinal for each segment.
Global ordinals are used if a search contains any of the following components:

* Certain bucket aggregations on `keyword`, `ip`, and `flattened` fields. This
includes `terms` aggregations as mentioned above, as well as
`diversified_sampler` and `significant_terms`.
includes `terms` aggregations as mentioned above, as well as `composite`,
`diversified_sampler`, and `significant_terms`.
* Bucket aggregations on `text` fields that require <<fielddata, `fielddata`>>
to be enabled.
* Operations on parent and child documents from a `join` field, including
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,7 @@
import org.elasticsearch.search.aggregations.LeafBucketCollector;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import static org.elasticsearch.core.Types.forciblyCast;
Expand Down Expand Up @@ -60,7 +58,6 @@ public int hashCode() {

private LongArray docCounts;
private boolean afterKeyIsSet = false;
private int leafReaderOrd = -1; // current LeafReaderContext ordinal

/**
* Constructs a composite queue with the specified size and sources.
Expand Down Expand Up @@ -237,26 +234,14 @@ LeafBucketCollector getLeafCollector(Comparable<?> forceLeadSourceValue, LeafRea
throws IOException {
int last = arrays.length - 1;
LeafBucketCollector collector = in;
boolean requiresRehashingWhenSwitchingLeafReaders = false;
while (last > 0) {
SingleDimensionValuesSource<?> valuesSource = arrays[last--];
requiresRehashingWhenSwitchingLeafReaders |= valuesSource.requiresRehashingWhenSwitchingLeafReaders();
collector = valuesSource.getLeafCollector(context, collector);
collector = arrays[last--].getLeafCollector(context, collector);
}
SingleDimensionValuesSource<?> valuesSource = arrays[last];
requiresRehashingWhenSwitchingLeafReaders |= valuesSource.requiresRehashingWhenSwitchingLeafReaders();
if (forceLeadSourceValue != null) {
collector = valuesSource.getLeafCollector(forciblyCast(forceLeadSourceValue), context, collector);
collector = arrays[last].getLeafCollector(forciblyCast(forceLeadSourceValue), context, collector);
} else {
collector = valuesSource.getLeafCollector(context, collector);
collector = arrays[last].getLeafCollector(context, collector);
}
boolean switchedLeafReaders = context.ord != leafReaderOrd;
if (map.isEmpty() == false && requiresRehashingWhenSwitchingLeafReaders && switchedLeafReaders) {
List<Map.Entry<Slot, Integer>> entries = new ArrayList<>(map.entrySet());
map.clear();
entries.forEach(e -> map.put(e.getKey(), e.getValue()));
}
leafReaderOrd = context.ord;
return collector;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.search.aggregations.bucket.composite;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.common.util.LongArray;
import org.elasticsearch.core.CheckedFunction;
import org.elasticsearch.core.Releasables;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.StringFieldType;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.LeafBucketCollector;

import java.io.IOException;

import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS;

/**
* A {@link SingleDimensionValuesSource} for global ordinals.
*/
class GlobalOrdinalValuesSource extends SingleDimensionValuesSource<BytesRef> {
public static final long MISSING_VALUE_FLAG = -1L;
private final CheckedFunction<LeafReaderContext, SortedSetDocValues, IOException> docValuesFunc;
private LongArray values;
private SortedSetDocValues lookup;
private long currentValue;
private Long afterValueGlobalOrd;
private boolean isTopValueInsertionPoint;

private long lastLookupOrd = -1;
private BytesRef lastLookupValue;

GlobalOrdinalValuesSource(
BigArrays bigArrays,
MappedFieldType type,
CheckedFunction<LeafReaderContext, SortedSetDocValues, IOException> docValuesFunc,
DocValueFormat format,
boolean missingBucket,
MissingOrder missingOrder,
int size,
int reverseMul
) {
super(bigArrays, format, type, missingBucket, missingOrder, size, reverseMul);
this.docValuesFunc = docValuesFunc;
this.values = bigArrays.newLongArray(Math.min(size, 100), false);
}

@Override
void copyCurrent(int slot) {
values = bigArrays.grow(values, slot + 1);
values.set(slot, currentValue);
}

private int compareInternal(long lhs, long rhs) {
int mul = (lhs == MISSING_VALUE_FLAG || rhs == MISSING_VALUE_FLAG) ? missingOrder.compareAnyValueToMissing(reverseMul) : reverseMul;
return Long.compare(lhs, rhs) * mul;
}

@Override
int compare(int from, int to) {
return compareInternal(values.get(from), values.get(to));
}

@Override
int compareCurrent(int slot) {
return compareInternal(currentValue, values.get(slot));
}

@Override
int compareCurrentWithAfter() {
int cmp = compareInternal(currentValue, afterValueGlobalOrd);
if (cmp == 0 && isTopValueInsertionPoint) {
// the top value is missing in this shard, the comparison is against
// the insertion point of the top value so equality means that the value
// is "after" the insertion point.
return missingOrder.compareAnyValueToMissing(reverseMul);
}
return cmp;
}

@Override
int hashCode(int slot) {
return Long.hashCode(values.get(slot));
}

@Override
int hashCodeCurrent() {
return Long.hashCode(currentValue);
}

@Override
void setAfter(Comparable<?> value) {
if (missingBucket && value == null) {
afterValue = null;
afterValueGlobalOrd = MISSING_VALUE_FLAG;
} else if (value.getClass() == String.class || (missingBucket && fieldType == null)) {
// the value might be not string if this field is missing in this shard but present in other shards
// and doesn't have a string type
afterValue = format.parseBytesRef(value.toString());
} else {
throw new IllegalArgumentException("invalid value, expected string, got " + value.getClass().getSimpleName());
}
}

@Override
BytesRef toComparable(int slot) throws IOException {
long globalOrd = values.get(slot);
if (missingBucket && globalOrd == MISSING_VALUE_FLAG) {
return null;
} else if (globalOrd == lastLookupOrd) {
return lastLookupValue;
} else {
lastLookupOrd = globalOrd;
lastLookupValue = BytesRef.deepCopyOf(lookup.lookupOrd(values.get(slot)));
return lastLookupValue;
}
}

@Override
LeafBucketCollector getLeafCollector(LeafReaderContext context, LeafBucketCollector next) throws IOException {
final SortedSetDocValues dvs = docValuesFunc.apply(context);
if (lookup == null) {
initLookup(dvs);
}
return new LeafBucketCollector() {
@Override
public void collect(int doc, long bucket) throws IOException {
if (dvs.advanceExact(doc)) {
long ord;
while ((ord = dvs.nextOrd()) != NO_MORE_ORDS) {
currentValue = ord;
next.collect(doc, bucket);
}
} else if (missingBucket) {
currentValue = MISSING_VALUE_FLAG;
next.collect(doc, bucket);
}
}
};
}

@Override
LeafBucketCollector getLeafCollector(Comparable<BytesRef> value, LeafReaderContext context, LeafBucketCollector next)
throws IOException {
if (value.getClass() != BytesRef.class) {
throw new IllegalArgumentException("Expected BytesRef, got " + value.getClass());
}
BytesRef term = (BytesRef) value;
final SortedSetDocValues dvs = docValuesFunc.apply(context);
if (lookup == null) {
initLookup(dvs);
}
return new LeafBucketCollector() {
boolean currentValueIsSet = false;

@Override
public void collect(int doc, long bucket) throws IOException {
if (currentValueIsSet == false) {
if (dvs.advanceExact(doc)) {
long ord;
while ((ord = dvs.nextOrd()) != NO_MORE_ORDS) {
if (term.equals(lookup.lookupOrd(ord))) {
currentValueIsSet = true;
currentValue = ord;
break;
}
}
}
}
assert currentValueIsSet;
next.collect(doc, bucket);
}
};
}

@Override
SortedDocsProducer createSortedDocsProducerOrNull(IndexReader reader, Query query) {
if (checkIfSortedDocsIsApplicable(reader, fieldType) == false
|| fieldType instanceof StringFieldType == false
|| (query != null && query.getClass() != MatchAllDocsQuery.class)) {
return null;
}
return new TermsSortedDocsProducer(fieldType.name());
}

@Override
public void close() {
Releasables.close(values);
}

private void initLookup(SortedSetDocValues dvs) throws IOException {
lookup = dvs;
if (afterValue != null && afterValueGlobalOrd == null) {
afterValueGlobalOrd = lookup.lookupTerm(afterValue);
if (afterValueGlobalOrd < 0) {
// convert negative insert position
afterValueGlobalOrd = -afterValueGlobalOrd - 1;
isTopValueInsertionPoint = true;
}
}
}
}
Loading