Skip to content

Commit

Permalink
Add scripting, supported-type tests to ValueCount (#53500)
Browse files Browse the repository at this point in the history
Also adds a few small notes to the documentation regarding potentially
unintuitive behavior
  • Loading branch information
polyfractal authored Mar 16, 2020
1 parent 4a5feab commit 84a59f8
Show file tree
Hide file tree
Showing 2 changed files with 158 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ These values can be extracted either from specific fields in the documents, or b
this aggregator will be used in conjunction with other single-value aggregations. For example, when computing the `avg`
one might be interested in the number of values the average is computed over.

`value_count` does not de-duplicate values, so even if a field has duplicates (or a script generates multiple
identical values for a single document), each value will be counted individually.

[source,console]
--------------------------------------------------
POST /sales/_search?size=0
Expand Down Expand Up @@ -77,3 +80,7 @@ POST /sales/_search?size=0
}
--------------------------------------------------
// TEST[setup:sales,stored_example_script]

NOTE:: Because `value_count` is designed to work with any field it internally treats all values as simple bytes.
Due to this implementation, if `_value` script variable is used to fetch a value instead of accessing the field
directly (e.g. a "value script"), the field value will be returned as a string instead of it's native format.
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,12 @@
package org.elasticsearch.search.aggregations.metrics;

import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
Expand All @@ -35,6 +37,7 @@
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.CheckedConsumer;
import org.elasticsearch.common.geo.GeoPoint;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.mapper.BooleanFieldMapper;
import org.elasticsearch.index.mapper.DateFieldMapper;
import org.elasticsearch.index.mapper.GeoPointFieldMapper;
Expand All @@ -44,21 +47,70 @@
import org.elasticsearch.index.mapper.NumberFieldMapper;
import org.elasticsearch.index.mapper.RangeFieldMapper;
import org.elasticsearch.index.mapper.RangeType;
import org.elasticsearch.script.MockScriptEngine;
import org.elasticsearch.script.Script;
import org.elasticsearch.script.ScriptEngine;
import org.elasticsearch.script.ScriptModule;
import org.elasticsearch.script.ScriptService;
import org.elasticsearch.script.ScriptType;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.AggregatorTestCase;
import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper;
import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
import org.elasticsearch.search.aggregations.support.ValueType;
import org.elasticsearch.search.aggregations.support.ValuesSourceType;

import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Consumer;
import java.util.function.Function;

import static java.util.Collections.singleton;

public class ValueCountAggregatorTests extends AggregatorTestCase {

private static final String FIELD_NAME = "field";

/** Script to return the {@code _value} provided by aggs framework. */
private static final String VALUE_SCRIPT = "_value";
private static final String SINGLE_SCRIPT = "single";

@Override
protected AggregationBuilder createAggBuilderForTypeTest(MappedFieldType fieldType, String fieldName) {
return new ValueCountAggregationBuilder("foo", null).field(fieldName);
}

@Override
protected List<ValuesSourceType> getSupportedValuesSourceTypes() {
return List.of(
CoreValuesSourceType.NUMERIC,
CoreValuesSourceType.BYTES,
CoreValuesSourceType.GEOPOINT,
CoreValuesSourceType.RANGE,
CoreValuesSourceType.HISTOGRAM
);
}

@Override
protected ScriptService getMockScriptService() {
Map<String, Function<Map<String, Object>, Object>> scripts = new HashMap<>();

scripts.put(VALUE_SCRIPT, vars -> (Double.valueOf((String) vars.get("_value")) + 1));
scripts.put(SINGLE_SCRIPT, vars -> 1);

MockScriptEngine scriptEngine = new MockScriptEngine(MockScriptEngine.NAME,
scripts,
Collections.emptyMap());
Map<String, ScriptEngine> engines = Collections.singletonMap(scriptEngine.getType(), scriptEngine);

return new ScriptService(Settings.EMPTY, engines, ScriptModule.CORE_CONTEXTS);
}

public void testNoDocs() throws IOException {
for (ValueType valueType : ValueType.values()) {
testCase(new MatchAllDocsQuery(), valueType, iw -> {
Expand Down Expand Up @@ -185,6 +237,105 @@ public void testRangeFieldValues() throws IOException {
}, fieldType);
}

public void testValueScriptNumber() throws IOException {
ValueCountAggregationBuilder aggregationBuilder = new ValueCountAggregationBuilder("name", null)
.field(FIELD_NAME)
.script(new Script(ScriptType.INLINE, MockScriptEngine.NAME, VALUE_SCRIPT, Collections.emptyMap()));

MappedFieldType fieldType = createMappedFieldType(ValueType.NUMERIC);
fieldType.setName(FIELD_NAME);
fieldType.setHasDocValues(true);

testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> {
iw.addDocument(singleton(new NumericDocValuesField(FIELD_NAME, 7)));
iw.addDocument(singleton(new NumericDocValuesField(FIELD_NAME, 8)));
iw.addDocument(singleton(new NumericDocValuesField(FIELD_NAME, 9)));
}, card -> {
assertEquals(3, card.getValue(), 0);
assertTrue(AggregationInspectionHelper.hasValue(card));
}, fieldType);
}

public void testSingleScriptNumber() throws IOException {
ValueCountAggregationBuilder aggregationBuilder = new ValueCountAggregationBuilder("name", null)
.field(FIELD_NAME);

MappedFieldType fieldType = createMappedFieldType(ValueType.NUMERIC);
fieldType.setName(FIELD_NAME);
fieldType.setHasDocValues(true);

testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> {
Document doc = new Document();
doc.add(new SortedNumericDocValuesField(FIELD_NAME, 7));
doc.add(new SortedNumericDocValuesField(FIELD_NAME, 7));
iw.addDocument(doc);

doc = new Document();
doc.add(new SortedNumericDocValuesField(FIELD_NAME, 8));
doc.add(new SortedNumericDocValuesField(FIELD_NAME, 8));
iw.addDocument(doc);

doc = new Document();
doc.add(new SortedNumericDocValuesField(FIELD_NAME, 1));
doc.add(new SortedNumericDocValuesField(FIELD_NAME, 1));
iw.addDocument(doc);
}, card -> {
// note: this is 6, even though the script returns a single value. ValueCount does not de-dedupe
assertEquals(6, card.getValue(), 0);
assertTrue(AggregationInspectionHelper.hasValue(card));
}, fieldType);
}

public void testValueScriptString() throws IOException {
ValueCountAggregationBuilder aggregationBuilder = new ValueCountAggregationBuilder("name", null)
.field(FIELD_NAME)
.script(new Script(ScriptType.INLINE, MockScriptEngine.NAME, VALUE_SCRIPT, Collections.emptyMap()));

MappedFieldType fieldType = createMappedFieldType(ValueType.STRING);
fieldType.setName(FIELD_NAME);
fieldType.setHasDocValues(true);

testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> {
iw.addDocument(singleton(new SortedDocValuesField(FIELD_NAME, new BytesRef("1"))));
iw.addDocument(singleton(new SortedDocValuesField(FIELD_NAME, new BytesRef("2"))));
iw.addDocument(singleton(new SortedDocValuesField(FIELD_NAME, new BytesRef("3"))));
}, card -> {
assertEquals(3, card.getValue(), 0);
assertTrue(AggregationInspectionHelper.hasValue(card));
}, fieldType);
}

public void testSingleScriptString() throws IOException {
ValueCountAggregationBuilder aggregationBuilder = new ValueCountAggregationBuilder("name", null)
.field(FIELD_NAME);

MappedFieldType fieldType = createMappedFieldType(ValueType.STRING);
fieldType.setName(FIELD_NAME);
fieldType.setHasDocValues(true);

testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> {
Document doc = new Document();
// Note: unlike numerics, lucene de-dupes strings so we increment here
doc.add(new SortedSetDocValuesField(FIELD_NAME, new BytesRef("1")));
doc.add(new SortedSetDocValuesField(FIELD_NAME, new BytesRef("2")));
iw.addDocument(doc);

doc = new Document();
doc.add(new SortedSetDocValuesField(FIELD_NAME, new BytesRef("3")));
doc.add(new SortedSetDocValuesField(FIELD_NAME, new BytesRef("4")));
iw.addDocument(doc);

doc = new Document();
doc.add(new SortedSetDocValuesField(FIELD_NAME, new BytesRef("5")));
doc.add(new SortedSetDocValuesField(FIELD_NAME, new BytesRef("6")));
iw.addDocument(doc);
}, card -> {
// note: this is 6, even though the script returns a single value. ValueCount does not de-dedupe
assertEquals(6, card.getValue(), 0);
assertTrue(AggregationInspectionHelper.hasValue(card));
}, fieldType);
}

private void testCase(Query query,
ValueType valueType,
CheckedConsumer<RandomIndexWriter, IOException> indexer,
Expand Down

0 comments on commit 84a59f8

Please sign in to comment.