Skip to content

Commit

Permalink
Revert "Avoid global ordinals in composite aggregation (elastic#74559)"
Browse files Browse the repository at this point in the history
This reverts commit 5cfcb2f.

 Conflicts:
	server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeValuesCollectorQueue.java
	server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/OrdinalValuesSource.java
	server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/TermsValuesSourceBuilder.java
	server/src/test/java/org/elasticsearch/search/aggregations/bucket/composite/SingleDimensionValuesSourceTests.java
  • Loading branch information
not-napoleon committed Oct 7, 2021
1 parent 29b80e3 commit 167ed48
Show file tree
Hide file tree
Showing 7 changed files with 227 additions and 431 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,7 @@
import org.elasticsearch.search.aggregations.LeafBucketCollector;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import static org.elasticsearch.core.Types.forciblyCast;
Expand Down Expand Up @@ -60,7 +58,6 @@ public int hashCode() {

private LongArray docCounts;
private boolean afterKeyIsSet = false;
private int leafReaderOrd = -1; // current LeafReaderContext ordinal

/**
* Constructs a composite queue with the specified size and sources.
Expand Down Expand Up @@ -237,26 +234,14 @@ LeafBucketCollector getLeafCollector(Comparable<?> forceLeadSourceValue, LeafRea
throws IOException {
int last = arrays.length - 1;
LeafBucketCollector collector = in;
boolean requiresRehashingWhenSwitchingLeafReaders = false;
while (last > 0) {
SingleDimensionValuesSource<?> valuesSource = arrays[last--];
requiresRehashingWhenSwitchingLeafReaders |= valuesSource.requiresRehashingWhenSwitchingLeafReaders();
collector = valuesSource.getLeafCollector(context, collector);
collector = arrays[last--].getLeafCollector(context, collector);
}
SingleDimensionValuesSource<?> valuesSource = arrays[last];
requiresRehashingWhenSwitchingLeafReaders |= valuesSource.requiresRehashingWhenSwitchingLeafReaders();
if (forceLeadSourceValue != null) {
collector = valuesSource.getLeafCollector(forciblyCast(forceLeadSourceValue), context, collector);
collector = arrays[last].getLeafCollector(forciblyCast(forceLeadSourceValue), context, collector);
} else {
collector = valuesSource.getLeafCollector(context, collector);
collector = arrays[last].getLeafCollector(context, collector);
}
boolean switchedLeafReaders = context.ord != leafReaderOrd;
if (map.isEmpty() == false && requiresRehashingWhenSwitchingLeafReaders && switchedLeafReaders) {
List<Map.Entry<Slot, Integer>> entries = new ArrayList<>(map.entrySet());
map.clear();
entries.forEach(e -> map.put(e.getKey(), e.getValue()));
}
leafReaderOrd = context.ord;
return collector;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.search.aggregations.bucket.composite;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.common.util.LongArray;
import org.elasticsearch.core.CheckedFunction;
import org.elasticsearch.core.Releasables;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.StringFieldType;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.LeafBucketCollector;

import java.io.IOException;

import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS;

/**
* A {@link SingleDimensionValuesSource} for global ordinals.
*/
class GlobalOrdinalValuesSource extends SingleDimensionValuesSource<BytesRef> {
public static final long MISSING_VALUE_FLAG = -1L;
private final CheckedFunction<LeafReaderContext, SortedSetDocValues, IOException> docValuesFunc;
private LongArray values;
private SortedSetDocValues lookup;
private long currentValue;
private Long afterValueGlobalOrd;
private boolean isTopValueInsertionPoint;

private long lastLookupOrd = -1;
private BytesRef lastLookupValue;

GlobalOrdinalValuesSource(
BigArrays bigArrays,
MappedFieldType type,
CheckedFunction<LeafReaderContext, SortedSetDocValues, IOException> docValuesFunc,
DocValueFormat format,
boolean missingBucket,
MissingOrder missingOrder,
int size,
int reverseMul
) {
super(bigArrays, format, type, missingBucket, missingOrder, size, reverseMul);
this.docValuesFunc = docValuesFunc;
this.values = bigArrays.newLongArray(Math.min(size, 100), false);
}

@Override
void copyCurrent(int slot) {
values = bigArrays.grow(values, slot + 1);
values.set(slot, currentValue);
}

private int compareInternal(long lhs, long rhs) {
int mul = (lhs == MISSING_VALUE_FLAG || rhs == MISSING_VALUE_FLAG) ? missingOrder.compareAnyValueToMissing(reverseMul) : reverseMul;
return Long.compare(lhs, rhs) * mul;
}

@Override
int compare(int from, int to) {
return compareInternal(values.get(from), values.get(to));
}

@Override
int compareCurrent(int slot) {
return compareInternal(currentValue, values.get(slot));
}

@Override
int compareCurrentWithAfter() {
int cmp = compareInternal(currentValue, afterValueGlobalOrd);
if (cmp == 0 && isTopValueInsertionPoint) {
// the top value is missing in this shard, the comparison is against
// the insertion point of the top value so equality means that the value
// is "after" the insertion point.
return missingOrder.compareAnyValueToMissing(reverseMul);
}
return cmp;
}

@Override
int hashCode(int slot) {
return Long.hashCode(values.get(slot));
}

@Override
int hashCodeCurrent() {
return Long.hashCode(currentValue);
}

@Override
void setAfter(Comparable<?> value) {
if (missingBucket && value == null) {
afterValue = null;
afterValueGlobalOrd = MISSING_VALUE_FLAG;
} else if (value.getClass() == String.class || (missingBucket && fieldType == null)) {
// the value might be not string if this field is missing in this shard but present in other shards
// and doesn't have a string type
afterValue = format.parseBytesRef(value.toString());
} else {
throw new IllegalArgumentException("invalid value, expected string, got " + value.getClass().getSimpleName());
}
}

@Override
BytesRef toComparable(int slot) throws IOException {
long globalOrd = values.get(slot);
if (missingBucket && globalOrd == MISSING_VALUE_FLAG) {
return null;
} else if (globalOrd == lastLookupOrd) {
return lastLookupValue;
} else {
lastLookupOrd = globalOrd;
lastLookupValue = BytesRef.deepCopyOf(lookup.lookupOrd(values.get(slot)));
return lastLookupValue;
}
}

@Override
LeafBucketCollector getLeafCollector(LeafReaderContext context, LeafBucketCollector next) throws IOException {
final SortedSetDocValues dvs = docValuesFunc.apply(context);
if (lookup == null) {
initLookup(dvs);
}
return new LeafBucketCollector() {
@Override
public void collect(int doc, long bucket) throws IOException {
if (dvs.advanceExact(doc)) {
long ord;
while ((ord = dvs.nextOrd()) != NO_MORE_ORDS) {
currentValue = ord;
next.collect(doc, bucket);
}
} else if (missingBucket) {
currentValue = MISSING_VALUE_FLAG;
next.collect(doc, bucket);
}
}
};
}

@Override
LeafBucketCollector getLeafCollector(Comparable<BytesRef> value, LeafReaderContext context, LeafBucketCollector next)
throws IOException {
if (value.getClass() != BytesRef.class) {
throw new IllegalArgumentException("Expected BytesRef, got " + value.getClass());
}
BytesRef term = (BytesRef) value;
final SortedSetDocValues dvs = docValuesFunc.apply(context);
if (lookup == null) {
initLookup(dvs);
}
return new LeafBucketCollector() {
boolean currentValueIsSet = false;

@Override
public void collect(int doc, long bucket) throws IOException {
if (currentValueIsSet == false) {
if (dvs.advanceExact(doc)) {
long ord;
while ((ord = dvs.nextOrd()) != NO_MORE_ORDS) {
if (term.equals(lookup.lookupOrd(ord))) {
currentValueIsSet = true;
currentValue = ord;
break;
}
}
}
}
assert currentValueIsSet;
next.collect(doc, bucket);
}
};
}

@Override
SortedDocsProducer createSortedDocsProducerOrNull(IndexReader reader, Query query) {
if (checkIfSortedDocsIsApplicable(reader, fieldType) == false
|| fieldType instanceof StringFieldType == false
|| (query != null && query.getClass() != MatchAllDocsQuery.class)) {
return null;
}
return new TermsSortedDocsProducer(fieldType.name());
}

@Override
public void close() {
Releasables.close(values);
}

private void initLookup(SortedSetDocValues dvs) throws IOException {
lookup = dvs;
if (afterValue != null && afterValueGlobalOrd == null) {
afterValueGlobalOrd = lookup.lookupTerm(afterValue);
if (afterValueGlobalOrd < 0) {
// convert negative insert position
afterValueGlobalOrd = -afterValueGlobalOrd - 1;
isTopValueInsertionPoint = true;
}
}
}
}
Loading

0 comments on commit 167ed48

Please sign in to comment.