diff --git a/rest-api-spec/build.gradle b/rest-api-spec/build.gradle index 0cef59a464047..10be8f5ad317d 100644 --- a/rest-api-spec/build.gradle +++ b/rest-api-spec/build.gradle @@ -82,6 +82,8 @@ tasks.named("yamlRestTestV7CompatTransform").configure { task -> task.skipTest("search.aggregation/20_terms/string profiler via map", "The profiler results aren't backwards compatible.") task.skipTest("search.aggregation/20_terms/numeric profiler", "The profiler results aren't backwards compatible.") + task.skipTest("msearch/20_typed_keys/Multisearch test with typed_keys parameter", "Test contains a no longer valid work around for the float range bug") + task.replaceValueInMatch("_type", "_doc") task.addAllowedWarningRegex("\\[types removal\\].*") task.replaceValueInMatch("nodes.\$node_id.roles.8", "ml", "node_info role test") diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/msearch/20_typed_keys.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/msearch/20_typed_keys.yml index 0be04fd01c0ed..89bc8bf53b8c7 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/msearch/20_typed_keys.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/msearch/20_typed_keys.yml @@ -64,7 +64,7 @@ setup: - index: test-* - {query: {match: {bool: true} }, size: 0, aggs: {test_filter: {filter: {range: {integer: {gte: 20} } } } } } - index: test-1 - - {query: {match_all: {} }, size: 0, aggs: {test_range: {range: {field: float, ranges: [ {to: 19.2499999}, {from: 19.25} ] } } } } + - {query: {match_all: {} }, size: 0, aggs: {test_range: {range: {field: float, ranges: [ {to: 19.25}, {from: 19.25} ] } } } } - index: test-* - {query: {bool: {filter: {range: {row: {lt: 5}}} } }, size: 0, aggs: {test_percentiles: {percentiles: {field: float} } } } # Testing suggesters @@ -78,7 +78,7 @@ setup: - match: { responses.0.hits.total: 3 } - match: { responses.0.aggregations.filter#test_filter.doc_count : 2 } - match: { responses.1.hits.total: 3 } - - match: { responses.1.aggregations.range#test_range.buckets.0.key : "*-19.2499999" } + - match: { responses.1.aggregations.range#test_range.buckets.0.key : "*-19.25" } - match: { responses.1.aggregations.range#test_range.buckets.0.doc_count : 2 } - match: { responses.1.aggregations.range#test_range.buckets.1.key : "19.25-*" } - match: { responses.1.aggregations.range#test_range.buckets.1.doc_count : 1 } diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.aggregation/40_range.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.aggregation/40_range.yml index afe66e4340d3b..60e78927c25fe 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.aggregation/40_range.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.aggregation/40_range.yml @@ -11,6 +11,10 @@ setup: type: double long: type: long + float: + type: float + half_float: + type: half_float - do: cluster.health: @@ -22,15 +26,71 @@ setup: refresh: true body: - {"index": {}} - - { "double" : 42.1, "long": 25 } + - { "double" : 42.1, "long": 25, "float": 0.01, "half_float": 0.01 } - {"index": {}} - - { "double" : 100.7, "long": 80 } + - { "double" : 100.7, "long": 80, "float": 0.03, "half_float": 0.0152 } - {"index": {}} - - { "double" : 50.5, "long": 75} + - { "double" : 50.5, "long": 75, "float": 0.04, "half_float": 0.04 } # For testing missing values - {"index": {}} - {} +--- +"Float Endpoint Exclusive": + - skip: + version: " - 7.99.99" + reason: Bug fixed in 7.16.0 (backport pending) + - do: + search: + body: + size: 0 + aggs: + double_range: + range: + format: "0.0#" + field: "float" + ranges: + - + from: 0 + to: 0.04 + - from: 0.04 + to: 1.0 + - match: { hits.total.relation: "eq" } + - match: { hits.total.value: 4 } + - length: { aggregations.double_range.buckets: 2 } + - match: { aggregations.double_range.buckets.0.key: "0.0-0.04" } + - match: { aggregations.double_range.buckets.0.doc_count: 2 } + - match: { aggregations.double_range.buckets.1.key: "0.04-1.0" } + - match: { aggregations.double_range.buckets.1.doc_count: 1 } + +--- +"Half Float Endpoint Exclusive": + - skip: + version: " - 7.99.99" + reason: Bug fixed in 7.16.0 (backport pending) + - do: + search: + body: + size: 0 + aggs: + double_range: + range: + format: "0.0###" + field: "half_float" + ranges: + - + from: 0 + to: 0.0152 + - from: 0.0152 + to: 1.0 + - match: { hits.total.relation: "eq" } + - match: { hits.total.value: 4 } + - length: { aggregations.double_range.buckets: 2 } + - match: { aggregations.double_range.buckets.0.key: "0.0-0.0152" } + - match: { aggregations.double_range.buckets.0.doc_count: 1 } + - match: { aggregations.double_range.buckets.1.key: "0.0152-1.0" } + - match: { aggregations.double_range.buckets.1.doc_count: 2 } + --- "Double range": - do: diff --git a/server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java index 7ae747dae0b2d..7f5c3adbc94d0 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java @@ -1040,6 +1040,21 @@ public String typeName() { return type.name; } + /** + * This method reinterprets a double precision value based on the maximum precision of the stored number field. Mostly this + * corrects for unrepresentable values which have different approximations when cast from floats than when parsed as doubles. + * It may seem strange to convert a double to a double, and it is. This function's goal is to reduce the precision + * on the double in the case that the backing number type would have parsed the value differently. This is to address + * the problem where (e.g.) 0.04F < 0.04D, which causes problems for range aggregations. + */ + public double reduceToStoredPrecision(double value) { + if (Double.isInfinite(value)) { + // Trying to parse infinite values into ints/longs throws. Understandably. + return value; + } + return type.parse(value, false).doubleValue(); + } + public NumericType numericType() { return type.numericType(); } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregationBuilder.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregationBuilder.java index c35dda2b4cf26..e3c3c56729600 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregationBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregationBuilder.java @@ -24,6 +24,7 @@ import java.io.IOException; import java.util.List; import java.util.Map; +import java.util.function.DoubleUnaryOperator; public class RangeAggregationBuilder extends AbstractRangeBuilder { public static final String NAME = "range"; @@ -152,12 +153,18 @@ protected RangeAggregatorFactory innerBuild( ) throws IOException { RangeAggregatorSupplier aggregatorSupplier = context.getValuesSourceRegistry().getAggregator(REGISTRY_KEY, config); + /* + This will downgrade the precision of the range bounds to match the field's precision. Fixes float/double issues, but not + long/double issues. See https://github.com/elastic/elasticsearch/issues/77033 + */ + DoubleUnaryOperator fixPrecision = config.reduceToStoredPrecisionFunction(); + // We need to call processRanges here so they are parsed before we make the decision of whether to cache the request Range[] ranges = processRanges(range -> { DocValueFormat parser = config.format(); assert parser != null; - Double from = range.from; - Double to = range.to; + Double from = fixPrecision.applyAsDouble(range.from); + Double to = fixPrecision.applyAsDouble(range.to); if (range.fromAsStr != null) { from = parser.parseDouble(range.fromAsStr, false, context::nowInMillis); } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceConfig.java b/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceConfig.java index 687c73ebb1e1d..7483419872ef3 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceConfig.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceConfig.java @@ -13,6 +13,7 @@ import org.elasticsearch.index.fielddata.IndexGeoPointFieldData; import org.elasticsearch.index.fielddata.IndexNumericFieldData; import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.mapper.NumberFieldMapper; import org.elasticsearch.index.mapper.RangeFieldMapper; import org.elasticsearch.script.AggregationScript; import org.elasticsearch.script.Script; @@ -20,6 +21,7 @@ import java.io.IOException; import java.time.ZoneId; +import java.util.function.DoubleUnaryOperator; import java.util.function.Function; /** @@ -321,6 +323,17 @@ public FieldContext fieldContext() { return fieldContext; } + /** + * Returns a function from the mapper that adjusts a double value to the value it would have been had it been parsed by that mapper + * and then cast up to a double. Used to correct precision errors. + */ + public DoubleUnaryOperator reduceToStoredPrecisionFunction() { + if (fieldContext() != null && fieldType() instanceof NumberFieldMapper.NumberFieldType) { + return ((NumberFieldMapper.NumberFieldType) fieldType())::reduceToStoredPrecision; + } + return (value) -> value; + } + /** * Convenience method for looking up the mapped field type backing this values source, if it exists. */ diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorTests.java index 989878a84c41c..10605c83c6540 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorTests.java @@ -13,14 +13,11 @@ import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.SortedSetDocValuesField; -import org.apache.lucene.index.DirectoryReader; -import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; -import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; -import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.NumericUtils; import org.elasticsearch.core.CheckedConsumer; import org.elasticsearch.index.mapper.DateFieldMapper; import org.elasticsearch.index.mapper.DateFieldMapper.Resolution; @@ -103,6 +100,69 @@ public void testMatchesNumericDocValues() throws IOException { }); } + /** + * Confirm that a non-representable decimal stored as a double correctly follows the half-open interval rule + */ + public void testDoubleRangesExclusiveEndpoint() throws IOException { + final String fieldName = "double"; + MappedFieldType field = new NumberFieldMapper.NumberFieldType(fieldName, NumberType.DOUBLE); + testCase( + new RangeAggregationBuilder("range").field(fieldName).addRange("r1", 0, 0.04D).addRange("r2", 0.04D, 1.0D), + new MatchAllDocsQuery(), + iw -> { iw.addDocument(List.of(new SortedNumericDocValuesField(fieldName, NumericUtils.doubleToSortableLong(0.04D)))); }, + result -> { + InternalRange range = (InternalRange) result; + List ranges = range.getBuckets(); + assertEquals(2, ranges.size()); + assertEquals(0, ranges.get(0).getDocCount()); + assertEquals(1, ranges.get(1).getDocCount()); + }, + field + ); + } + + /** + * Confirm that a non-representable decimal stored as a float correctly follows the half-open interval rule + */ + public void testFloatRangesExclusiveEndpoint() throws IOException { + final String fieldName = "float"; + MappedFieldType field = new NumberFieldMapper.NumberFieldType(fieldName, NumberType.FLOAT); + testCase( + new RangeAggregationBuilder("range").field(fieldName).addRange("r1", 0, 0.04D).addRange("r2", 0.04D, 1.0D), + new MatchAllDocsQuery(), + iw -> { iw.addDocument(NumberType.FLOAT.createFields(fieldName, 0.04F, false, true, false)); }, + result -> { + InternalRange range = (InternalRange) result; + List ranges = range.getBuckets(); + assertEquals(2, ranges.size()); + assertEquals(0, ranges.get(0).getDocCount()); + assertEquals(1, ranges.get(1).getDocCount()); + }, + field + ); + } + + /** + * Confirm that a non-representable decimal stored as a half_float correctly follows the half-open interval rule + */ + public void testHalfFloatRangesExclusiveEndpoint() throws IOException { + final String fieldName = "halfFloat"; + MappedFieldType field = new NumberFieldMapper.NumberFieldType(fieldName, NumberType.HALF_FLOAT); + testCase( + new RangeAggregationBuilder("range").field(fieldName).addRange("r1", 0, 0.0152D).addRange("r2", 0.0152D, 1.0D), + new MatchAllDocsQuery(), + iw -> { iw.addDocument(NumberType.HALF_FLOAT.createFields(fieldName, 0.0152F, false, true, false)); }, + result -> { + InternalRange range = (InternalRange) result; + List ranges = range.getBuckets(); + assertEquals(2, ranges.size()); + assertEquals(0, ranges.get(0).getDocCount()); + assertEquals(1, ranges.get(1).getDocCount()); + }, + field + ); + } + public void testUnboundedRanges() throws IOException { testCase( new RangeAggregationBuilder("name").field(NUMBER_FIELD_NAME).addUnboundedTo(5).addUnboundedFrom(5), @@ -174,7 +234,7 @@ public void testDateFieldMillisecondResolution() throws IOException { testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> { iw.addDocument(List.of(new SortedNumericDocValuesField(DATE_FIELD_NAME, milli1), new LongPoint(DATE_FIELD_NAME, milli1))); iw.addDocument(List.of(new SortedNumericDocValuesField(DATE_FIELD_NAME, milli2), new LongPoint(DATE_FIELD_NAME, milli2))); - }, range -> { + }, (Consumer>) range -> { List ranges = range.getBuckets(); assertEquals(1, ranges.size()); assertEquals(1, ranges.get(0).getDocCount()); @@ -205,7 +265,7 @@ public void testDateFieldNanosecondResolution() throws IOException { testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> { iw.addDocument(singleton(new SortedNumericDocValuesField(DATE_FIELD_NAME, TimeUnit.MILLISECONDS.toNanos(milli1)))); iw.addDocument(singleton(new SortedNumericDocValuesField(DATE_FIELD_NAME, TimeUnit.MILLISECONDS.toNanos(milli2)))); - }, range -> { + }, (Consumer>) range -> { List ranges = range.getBuckets(); assertEquals(1, ranges.size()); assertEquals(1, ranges.get(0).getDocCount()); @@ -239,7 +299,7 @@ public void testMissingDateWithDateNanosField() throws IOException { iw.addDocument(singleton(new SortedNumericDocValuesField(DATE_FIELD_NAME, TimeUnit.MILLISECONDS.toNanos(milli2)))); // Missing will apply to this document iw.addDocument(singleton(new SortedNumericDocValuesField(NUMBER_FIELD_NAME, 7))); - }, range -> { + }, (Consumer>) range -> { List ranges = range.getBuckets(); assertEquals(1, ranges.size()); assertEquals(2, ranges.get(0).getDocCount()); @@ -273,7 +333,7 @@ public void testNotFitIntoDouble() throws IOException { for (long l = start; l < start + 150; l++) { iw.addDocument(List.of(new SortedNumericDocValuesField(NUMBER_FIELD_NAME, l), new LongPoint(NUMBER_FIELD_NAME, l))); } - }, range -> { + }, (Consumer>) range -> { List ranges = range.getBuckets(); assertThat(ranges, hasSize(3)); // If we had a native `double` range aggregator we'd get 50, 50, 50 @@ -305,7 +365,7 @@ public void testUnmappedWithMissingNumber() throws IOException { testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> { iw.addDocument(singleton(new NumericDocValuesField(NUMBER_FIELD_NAME, 7))); iw.addDocument(singleton(new NumericDocValuesField(NUMBER_FIELD_NAME, 1))); - }, range -> { + }, (Consumer>) range -> { List ranges = range.getBuckets(); assertEquals(1, ranges.size()); assertEquals(2, ranges.get(0).getDocCount()); @@ -542,31 +602,4 @@ private void simpleTestCase( iw.addDocument(singleton(new SortedNumericDocValuesField(NUMBER_FIELD_NAME, 3))); }, verify, fieldType); } - - private void testCase( - RangeAggregationBuilder aggregationBuilder, - Query query, - CheckedConsumer buildIndex, - Consumer>> verify, - MappedFieldType fieldType - ) throws IOException { - try (Directory directory = newDirectory()) { - RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory); - buildIndex.accept(indexWriter); - indexWriter.close(); - - try (IndexReader indexReader = DirectoryReader.open(directory)) { - IndexSearcher indexSearcher = newSearcher(indexReader, true, true); - - InternalRange> agg = searchAndReduce( - indexSearcher, - query, - aggregationBuilder, - fieldType - ); - verify.accept(agg); - - } - } - } }