Skip to content

Commit

Permalink
Calculate precise cardinality upper bounds (elastic#61529) (elastic#6…
Browse files Browse the repository at this point in the history
…1754)

This reworks `CardinalityUpperBound` to support precise estimates while
maintaining most of the public API. This will allow us to make more
informed choices about the data structures that we use in aggregations.
None of those interesting choices come as part of this change, but they
are more possible with it.
  • Loading branch information
nik9000 authored Aug 31, 2020
1 parent f39a9bb commit fb84c1f
Show file tree
Hide file tree
Showing 7 changed files with 108 additions and 34 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,22 +23,29 @@
import org.elasticsearch.search.aggregations.bucket.filter.FilterAggregator;
import org.elasticsearch.search.aggregations.bucket.range.RangeAggregator;

import java.util.function.IntFunction;

/**
* Upper bound of how many {@code owningBucketOrds} that an {@link Aggregator}
* will have to collect into. Just "none", "one", and "many".
* will have to collect into.
*/
public enum CardinalityUpperBound {
public abstract class CardinalityUpperBound {
/**
* {@link Aggregator}s with this cardinality won't collect any data at
* all. For the most part this happens when an aggregation is inside of a
* {@link BucketsAggregator} that is pointing to an unmapped field.
*/
NONE {
public static final CardinalityUpperBound NONE = new CardinalityUpperBound() {
@Override
public CardinalityUpperBound multiply(int bucketCount) {
return NONE;
}
},

@Override
public <R> R map(IntFunction<R> mapper) {
return mapper.apply(0);
}
};

/**
* {@link Aggregator}s with this cardinality will collect be collected
Expand All @@ -47,35 +54,32 @@ public CardinalityUpperBound multiply(int bucketCount) {
* aggregations like {@link FilterAggregator} or a {@link RangeAggregator}
* configured to collect only a single range.
*/
ONE {
@Override
public CardinalityUpperBound multiply(int bucketCount) {
switch (bucketCount) {
case 0:
return NONE;
case 1:
return ONE;
default:
return MANY;
}
}
},
public static final CardinalityUpperBound ONE = new KnownCardinalityUpperBound(1);

/**
* {@link Aggregator}s with this cardinality may be collected many times.
* Most sub-aggregators of {@link BucketsAggregator}s will have
* this cardinality.
*/
MANY {
public static final CardinalityUpperBound MANY = new CardinalityUpperBound() {
@Override
public CardinalityUpperBound multiply(int bucketCount) {
if (bucketCount == 0) {
return NONE;
}
return MANY;
}

@Override
public <R> R map(IntFunction<R> mapper) {
return mapper.apply(Integer.MAX_VALUE);
}
};

private CardinalityUpperBound() {
// Sealed class
}

/**
* Get the rough measure of the number of buckets a fixed-bucket
* {@link Aggregator} will collect.
Expand All @@ -84,4 +88,46 @@ public CardinalityUpperBound multiply(int bucketCount) {
* will collect per owning ordinal
*/
public abstract CardinalityUpperBound multiply(int bucketCount);

/**
* Map the cardinality to a value. The argument to the {@code mapper}
* is the estimated cardinality, or {@code Integer.MAX_VALUE} if the
* cardinality is unknown.
*/
public abstract <R> R map(IntFunction<R> mapper);

/**
* Cardinality estimate with a known upper bound.
*/
private static class KnownCardinalityUpperBound extends CardinalityUpperBound {
private final int estimate;

KnownCardinalityUpperBound(int estimate) {
this.estimate = estimate;
}

@Override
public CardinalityUpperBound multiply(int bucketCount) {
if (bucketCount < 0) {
throw new IllegalArgumentException("bucketCount must be positive but was [" + bucketCount + "]");
}
switch (bucketCount) {
case 0:
return NONE;
case 1:
return this;
default:
long newEstimate = (long) estimate * (long) bucketCount;
if (newEstimate >= Integer.MAX_VALUE) {
return MANY;
}
return new KnownCardinalityUpperBound((int) newEstimate);
}
}

@Override
public <R> R map(IntFunction<R> mapper) {
return mapper.apply(estimate);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ public class NestedAggregator extends BucketsAggregator implements SingleBucketA
: Queries.newNonNestedFilter(context.mapperService().getIndexSettings().getIndexVersionCreated());
this.parentFilter = context.bitsetFilterCache().getBitSetProducer(parentFilter);
this.childFilter = childObjectMapper.nestedTypeFilter();
this.collectsFromSingleBucket = cardinality != CardinalityUpperBound.MANY;
this.collectsFromSingleBucket = cardinality.map(estimate -> estimate < 2);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ public abstract class BytesKeyedBucketOrds implements Releasable {
* Build a {@link LongKeyedBucketOrds}.
*/
public static BytesKeyedBucketOrds build(BigArrays bigArrays, CardinalityUpperBound cardinality) {
return cardinality == CardinalityUpperBound.ONE ? new FromSingle(bigArrays) : new FromMany(bigArrays);
return cardinality.map(estimate -> estimate < 2 ? new FromSingle(bigArrays) : new FromMany(bigArrays));
}

private BytesKeyedBucketOrds() {}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,10 +106,12 @@ public GlobalOrdinalsStringTermsAggregator(
if (remapGlobalOrds) {
this.collectionStrategy = new RemapGlobalOrds(cardinality);
} else {
if (cardinality == CardinalityUpperBound.MANY) {
throw new AggregationExecutionException("Dense ords don't know how to collect from many buckets");
}
this.collectionStrategy = new DenseGlobalOrds();
this.collectionStrategy = cardinality.map(estimate -> {
if (estimate > 1) {
throw new AggregationExecutionException("Dense ords don't know how to collect from many buckets");
}
return new DenseGlobalOrds();
});
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@ public abstract class LongKeyedBucketOrds implements Releasable {
* Build a {@link LongKeyedBucketOrds}.
*/
public static LongKeyedBucketOrds build(BigArrays bigArrays, CardinalityUpperBound cardinality) {
// TODO nothing NONE?
return cardinality != CardinalityUpperBound.MANY ? new FromSingle(bigArrays) : new FromMany(bigArrays);
return cardinality.map(estimate -> estimate < 2 ? new FromSingle(bigArrays) : new FromMany(bigArrays));
}

private LongKeyedBucketOrds() {}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,20 +22,47 @@
import org.elasticsearch.test.ESTestCase;

import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.sameInstance;

public class CardinalityUpperBoundTests extends ESTestCase {
public void testNoneMultiply() {
assertThat(CardinalityUpperBound.NONE.multiply(randomInt()), equalTo(CardinalityUpperBound.NONE));
assertThat(CardinalityUpperBound.NONE.multiply(randomInt()), sameInstance(CardinalityUpperBound.NONE));
}

public void testNoneMap() {
assertThat(CardinalityUpperBound.NONE.map(i -> i), equalTo(0));
}

public void testOneMultiply() {
assertThat(CardinalityUpperBound.ONE.multiply(0), equalTo(CardinalityUpperBound.NONE));
assertThat(CardinalityUpperBound.ONE.multiply(1), equalTo(CardinalityUpperBound.ONE));
assertThat(CardinalityUpperBound.ONE.multiply(between(2, Integer.MAX_VALUE)), equalTo(CardinalityUpperBound.MANY));
assertThat(CardinalityUpperBound.ONE.multiply(0), sameInstance(CardinalityUpperBound.NONE));
assertThat(CardinalityUpperBound.ONE.multiply(1), sameInstance(CardinalityUpperBound.ONE));
assertThat(CardinalityUpperBound.ONE.multiply(Integer.MAX_VALUE), sameInstance(CardinalityUpperBound.MANY));
}

public void testOneMap() {
assertThat(CardinalityUpperBound.ONE.map(i -> i), equalTo(1));
}

public void testLargerKnownValues() {
int estimate = between(2, Short.MAX_VALUE);
CardinalityUpperBound known = CardinalityUpperBound.ONE.multiply(estimate);
assertThat(known.map(i -> i), equalTo(estimate));

assertThat(known.multiply(0), sameInstance(CardinalityUpperBound.NONE));
assertThat(known.multiply(1), sameInstance(known));
int minOverflow = (int) Math.ceil((double) Integer.MAX_VALUE / estimate);
assertThat(known.multiply(between(minOverflow, Integer.MAX_VALUE)), sameInstance(CardinalityUpperBound.MANY));

int multiplier = between(2, Short.MAX_VALUE - 1);
assertThat(known.multiply(multiplier).map(i -> i), equalTo(estimate * multiplier));
}

public void testManyMultiply() {
assertThat(CardinalityUpperBound.MANY.multiply(0), equalTo(CardinalityUpperBound.NONE));
assertThat(CardinalityUpperBound.MANY.multiply(between(1, Integer.MAX_VALUE)), equalTo(CardinalityUpperBound.MANY));
assertThat(CardinalityUpperBound.MANY.multiply(0), sameInstance(CardinalityUpperBound.NONE));
assertThat(CardinalityUpperBound.MANY.multiply(between(1, Integer.MAX_VALUE)), sameInstance(CardinalityUpperBound.MANY));
}

public void testManyMap() {
assertThat(CardinalityUpperBound.MANY.map(i -> i), equalTo(Integer.MAX_VALUE));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -289,9 +289,9 @@ public void testSubAggCollectsFromManyBucketsIfManyRanges() throws IOException {
simpleTestCase(aggregationBuilder, new MatchAllDocsQuery(), range -> {
List<? extends InternalRange.Bucket> ranges = range.getBuckets();
InternalAggCardinality pc = ranges.get(0).getAggregations().get("c");
assertThat(pc.cardinality(), equalTo(CardinalityUpperBound.MANY));
assertThat(pc.cardinality().map(i -> i), equalTo(2));
pc = ranges.get(1).getAggregations().get("c");
assertThat(pc.cardinality(), equalTo(CardinalityUpperBound.MANY));
assertThat(pc.cardinality().map(i -> i), equalTo(2));
});
}

Expand Down

0 comments on commit fb84c1f

Please sign in to comment.