-
Notifications
You must be signed in to change notification settings - Fork 24.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Optimize the composite aggregation for match_all and range queries #28745
Changes from 20 commits
d46143c
6a8e866
0bc679e
0581226
8fd25e1
0d32ab0
32f0904
0634551
a7f8ffe
35c017e
841118c
6445f23
4437f2e
cdba4c2
93e3345
cc6539c
fc91434
f9d1eeb
f2588f2
eb61b02
f22dd2a
8bf9703
e58e540
1d71d98
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -99,6 +99,7 @@ setup: | |
- do: | ||
search: | ||
index: test | ||
allow_partial_search_results: false | ||
body: | ||
aggregations: | ||
test: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,137 @@ | ||
/* | ||
* Licensed to Elasticsearch under one or more contributor | ||
* license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright | ||
* ownership. Elasticsearch licenses this file to you under | ||
* the Apache License, Version 2.0 (the "License"); you may | ||
* not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
package org.elasticsearch.search.aggregations.bucket.composite; | ||
|
||
import org.apache.lucene.index.IndexReader; | ||
import org.apache.lucene.index.LeafReaderContext; | ||
import org.apache.lucene.search.MatchAllDocsQuery; | ||
import org.apache.lucene.search.Query; | ||
import org.apache.lucene.util.BytesRef; | ||
import org.elasticsearch.common.CheckedFunction; | ||
import org.elasticsearch.index.fielddata.SortedBinaryDocValues; | ||
import org.elasticsearch.index.mapper.MappedFieldType; | ||
import org.elasticsearch.search.aggregations.LeafBucketCollector; | ||
|
||
import java.io.IOException; | ||
|
||
/** | ||
* A {@link SingleDimensionValuesSource} for binary source ({@link BytesRef}). | ||
*/ | ||
class BinaryValuesSource extends SingleDimensionValuesSource<BytesRef> { | ||
private final CheckedFunction<LeafReaderContext, SortedBinaryDocValues, IOException> docValuesFunc; | ||
private final BytesRef[] values; | ||
private BytesRef currentValue; | ||
|
||
BinaryValuesSource(MappedFieldType fieldType, CheckedFunction<LeafReaderContext, SortedBinaryDocValues, IOException> docValuesFunc, | ||
int size, int reverseMul) { | ||
super(fieldType, size, reverseMul); | ||
this.docValuesFunc = docValuesFunc; | ||
this.values = new BytesRef[size]; | ||
} | ||
|
||
@Override | ||
String type() { | ||
return "binary"; | ||
} | ||
|
||
@Override | ||
public void copyCurrent(int slot) { | ||
values[slot] = BytesRef.deepCopyOf(currentValue); | ||
} | ||
|
||
@Override | ||
public int compare(int from, int to) { | ||
return compareValues(values[from], values[to]); | ||
} | ||
|
||
@Override | ||
int compareCurrent(int slot) { | ||
return compareValues(currentValue, values[slot]); | ||
} | ||
|
||
@Override | ||
int compareCurrentWithAfter() { | ||
return compareValues(currentValue, afterValue); | ||
} | ||
|
||
int compareValues(BytesRef v1, BytesRef v2) { | ||
return v1.compareTo(v2) * reverseMul; | ||
} | ||
|
||
@Override | ||
void setAfter(Comparable<?> value) { | ||
if (value.getClass() == BytesRef.class) { | ||
afterValue = (BytesRef) value; | ||
} else if (value.getClass() == String.class) { | ||
afterValue = new BytesRef((String) value); | ||
} else { | ||
throw new IllegalArgumentException("invalid value, expected string, got " + value.getClass().getSimpleName()); | ||
} | ||
} | ||
|
||
@Override | ||
BytesRef toComparable(int slot) { | ||
return values[slot]; | ||
} | ||
|
||
@Override | ||
LeafBucketCollector getLeafCollector(LeafReaderContext context, LeafBucketCollector next) throws IOException { | ||
final SortedBinaryDocValues dvs = docValuesFunc.apply(context); | ||
return new LeafBucketCollector() { | ||
@Override | ||
public void collect(int doc, long bucket) throws IOException { | ||
if (dvs.advanceExact(doc)) { | ||
int num = dvs.docValueCount(); | ||
for (int i = 0; i < num; i++) { | ||
currentValue = dvs.nextValue(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this means currentValue will always be the higher value in case of a multi-valued field, is that ok? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. currentValue is only valid for the current composite bucket, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see, thanks. |
||
next.collect(doc, bucket); | ||
} | ||
} | ||
} | ||
}; | ||
} | ||
|
||
@Override | ||
LeafBucketCollector getLeafCollector(Comparable<?> value, LeafReaderContext context, LeafBucketCollector next) { | ||
if (value.getClass() != BytesRef.class) { | ||
throw new IllegalArgumentException("Expected BytesRef, got " + value.getClass()); | ||
} | ||
final BytesRef filterValue = (BytesRef) value; | ||
return new LeafBucketCollector() { | ||
@Override | ||
public void collect(int doc, long bucket) throws IOException { | ||
currentValue = filterValue; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we need to set it for every doc? |
||
next.collect(doc, bucket); | ||
} | ||
}; | ||
} | ||
|
||
@Override | ||
SortedDocsProducer createSortedDocsProducerOrNull(IndexReader reader, Query query) { | ||
if (checkIfSortedDocsIsApplicable(reader, fieldType) == false || | ||
(query != null && query.getClass() != MatchAllDocsQuery.class)) { | ||
return null; | ||
} | ||
return new TermsSortedDocsProducer(fieldType.name()); | ||
} | ||
|
||
@Override | ||
public void close() {} | ||
} |
This file was deleted.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
do we need to accept both BytesRef and String?