elastic · nik9000 · Mar 8, 2021 · Mar 5, 2021 · Mar 8, 2021 · Mar 8, 2021
diff --git a/server/src/main/java/org/elasticsearch/common/time/DateUtils.java b/server/src/main/java/org/elasticsearch/common/time/DateUtils.java
@@ -194,7 +194,10 @@ public static ZoneId of(String zoneId) {
         return ZoneId.of(zoneId).normalized();
     }
 
-    static final Instant MAX_NANOSECOND_INSTANT = Instant.parse("2262-04-11T23:47:16.854775807Z");
+    /**
+     * The maximum nanosecond resolution date we can properly handle.
+     */
+    public static final Instant MAX_NANOSECOND_INSTANT = Instant.parse("2262-04-11T23:47:16.854775807Z");
 
     static final long MAX_NANOSECOND_IN_MILLIS = MAX_NANOSECOND_INSTANT.toEpochMilli();
 

diff --git a/server/src/main/java/org/elasticsearch/common/xcontent/XContentHelper.java b/server/src/main/java/org/elasticsearch/common/xcontent/XContentHelper.java
@@ -77,6 +77,12 @@ public static XContentParser createParser(NamedXContentRegistry xContentRegistry
 
     /**
      * Converts the given bytes into a map that is optionally ordered.
+     * <p>
+     * Important: This can lose precision on numbers with a decimal point. It
+     * converts numbers like {@code "n": 1234.567} to a {@code double} which
+     * only has 52 bits of precision in the mantissa. This will come up most
+     * frequently when folks write nanosecond precision dates as a decimal
+     * number.
      * @deprecated this method relies on auto-detection of content type. Use {@link #convertToMap(BytesReference, boolean, XContentType)}
      *             instead with the proper {@link XContentType}
      */
@@ -88,6 +94,12 @@ public static Tuple<XContentType, Map<String, Object>> convertToMap(BytesReferen
 
     /**
      * Converts the given bytes into a map that is optionally ordered. The provided {@link XContentType} must be non-null.
+     * <p>
+     * Important: This can lose precision on numbers with a decimal point. It
+     * converts numbers like {@code "n": 1234.567} to a {@code double} which
+     * only has 52 bits of precision in the mantissa. This will come up most
+     * frequently when folks write nanosecond precision dates as a decimal
+     * number.
      */
     public static Tuple<XContentType, Map<String, Object>> convertToMap(BytesReference bytes, boolean ordered, XContentType xContentType)
         throws ElasticsearchParseException {

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java
@@ -39,6 +39,7 @@
 import org.elasticsearch.search.lookup.SearchLookup;
 
 import java.io.IOException;
+import java.text.NumberFormat;
 import java.time.DateTimeException;
 import java.time.Instant;
 import java.time.ZoneId;
@@ -345,6 +346,23 @@ public long parse(String value) {
             return resolution.convert(DateFormatters.from(dateTimeFormatter().parse(value), dateTimeFormatter().locale()).toInstant());
         }
 
+        /**
+         * Format to use to resolve {@link Number}s from the source. Its valid
+         * to send the numbers with up to six digits after the decimal place
+         * and we'll parse them as {@code millis.nanos}. The source
+         * deseralization code isn't particularly careful here and can return
+         * {@link double} instead of the exact string in the {@code _source}.
+         * So we have to *get* that string.
+         * <p>
+         * Nik chose not to use {@link String#format} for this because it feels
+         * a little wasteful. It'd probably be fine but this makes Nik feel a
+         * less bad about the {@code instanceof} and the string allocation.
+         */
+        private static final NumberFormat NUMBER_FORMAT = NumberFormat.getInstance(Locale.ROOT);
+        static {
+            NUMBER_FORMAT.setGroupingUsed(false);
+            NUMBER_FORMAT.setMaximumFractionDigits(6);
+        }
         @Override
         public ValueFetcher valueFetcher(SearchExecutionContext context, String format) {
             DateFormatter defaultFormatter = dateTimeFormatter();
@@ -355,7 +373,8 @@ public ValueFetcher valueFetcher(SearchExecutionContext context, String format)
             return new SourceValueFetcher(name(), context, nullValue) {
                 @Override
                 public String parseSourceValue(Object value) {
-                    String date = value.toString();
+                    String date = value instanceof Number ? NUMBER_FORMAT.format(value) : value.toString();
+                    // TODO can we emit a warning if we're losing precision here? I'm not sure we can.
                     long timestamp = parse(date);
                     ZonedDateTime dateTime = resolution().toInstant(timestamp).atZone(ZoneOffset.UTC);
                     return formatter.format(dateTime);

diff --git a/server/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java b/server/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java
@@ -50,7 +50,13 @@ public int docId() {
 
     /**
      * Return the source as a map that will be unchanged when the lookup
-     * moves to a different document
+     * moves to a different document.
+     * <p>
+     * Important: This can lose precision on numbers with a decimal point. It
+     * converts numbers like {@code "n": 1234.567} to a {@code double} which
+     * only has 52 bits of precision in the mantissa. This will come up most
+     * frequently when folks write nanosecond precision dates as a decimal
+     * number.
      */
     public Map<String, Object> source() {
         if (source != null) {
@@ -84,6 +90,15 @@ private static Tuple<XContentType, Map<String, Object>> sourceAsMapAndType(Bytes
         return XContentHelper.convertToMap(source, false);
     }
 
+    /**
+     * Get the source as a {@link Map} of java objects.
+     * <p>
+     * Important: This can lose precision on numbers with a decimal point. It
+     * converts numbers like {@code "n": 1234.567} to a {@code double} which
+     * only has 52 bits of precision in the mantissa. This will come up most
+     * frequently when folks write nanosecond precision dates as a decimal
+     * number.
+     */
     public static Map<String, Object> sourceAsMap(BytesReference source) throws ElasticsearchParseException {
         return sourceAsMapAndType(source).v2();
     }

diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DateFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DateFieldMapperTests.java
@@ -19,10 +19,12 @@
 import org.elasticsearch.search.DocValueFormat;
 
 import java.io.IOException;
+import java.math.BigDecimal;
 import java.time.ZoneId;
 import java.time.ZoneOffset;
 import java.time.ZonedDateTime;
 import java.util.List;
+import java.util.Locale;
 
 import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.equalTo;
@@ -356,4 +358,148 @@ public void testResolutionRounding() {
             assertThat(up, equalTo(0L));
         }
     }
+
+    /**
+     * The max date iso8601 can parse. It'll format much larger dates.
+     */
+    private static final long MAX_ISO_DATE = DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("9999-12-12T23:59:59.999Z");
+
+    public void testFetchMillis() throws IOException {
+        assertFetch(dateMapperService(), "field", randomLongBetween(0, Long.MAX_VALUE), null);
+    }
+
+    public void testFetchMillisFromMillisFormatted() throws IOException {
+        assertFetch(dateMapperService(), "field", randomLongBetween(0, Long.MAX_VALUE), "epoch_millis");
+    }
+
+    public void testFetchMillisFromMillisFormattedIso8601() throws IOException {
+        assertFetch(dateMapperService(), "field", randomLongBetween(0, Long.MAX_VALUE), "iso8601");
+    }
+
+    public void testFetchMillisFromIso8601() throws IOException {
+        assertFetch(
+            dateMapperService(),
+            "field",
+            DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.formatMillis(randomLongBetween(0, MAX_ISO_DATE)),
+            "iso8601"
+        );
+    }
+
+    public void testFetchMillisFromIso8601Nanos() throws IOException {
+        assertFetch(dateMapperService(), "field", randomIs8601Nanos(MAX_ISO_DATE), null);
+    }
+
+    public void testFetchMillisFromIso8601NanosFormatted() throws IOException {
+        assertFetch(dateMapperService(), "field", randomIs8601Nanos(MAX_ISO_DATE), "strict_date_optional_time_nanos");
+    }
+
+    /**
+     * Tests round tripping a date with nanosecond resolution through doc
+     * values and field fetching via the {@code date} field. We expect this to
+     * lose precision because the {@code date} field only supports millisecond
+     * resolution. But its important that this lose precision in the same
+     * way.
+     */
+    public void testFetchMillisFromRoundedNanos() throws IOException {
+        assertFetch(dateMapperService(), "field", randomDecimalNanos(MAX_ISO_DATE), null);
+    }
+
+    /**
+     * Tests round tripping a date with nanosecond resolution through doc
+     * values and field fetching via the {@code date} field with a specific
+     * format. We expect this to lose precision because the {@code date}
+     * field only supports millisecond resolution. But its important that
+     * this lose precision in the same way.
+     */
+    @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/70085") // Fails about 1/1000 of the time because of rounding.
+    public void testFetchMillisFromFixedNanos() throws IOException {
+        assertFetch(dateMapperService(), "field", new BigDecimal(randomDecimalNanos(MAX_ISO_DATE)), null);
+    }
+
+    private MapperService dateMapperService() throws IOException {
+        return createMapperService(mapping(b -> b.startObject("field").field("type", "date").endObject()));
+    }
+
+    /**
+     * The maximum valid nanosecond date in milliseconds since epoch.
+     */
+    private static final long MAX_NANOS = DateUtils.MAX_NANOSECOND_INSTANT.toEpochMilli();
+
+    public void testFetchNanos() throws IOException {
+        assertFetch(dateNanosMapperService(), "field", randomLongBetween(0, MAX_NANOS), null);
+    }
+
+    public void testFetchNanosFromMillisFormatted() throws IOException {
+        assertFetch(dateNanosMapperService(), "field", randomLongBetween(0, MAX_NANOS), "epoch_millis");
+    }
+
+    public void testFetchNanosFromMillisFormattedIso8601() throws IOException {
+        assertFetch(dateNanosMapperService(), "field", randomLongBetween(0, MAX_NANOS), "iso8601");
+    }
+
+    public void testFetchNanosFromIso8601Nanos() throws IOException {
+        assertFetch(dateNanosMapperService(), "field", randomIs8601Nanos(MAX_NANOS), null);
+    }
+
+    public void testFetchNanosFromIso8601NanosFormatted() throws IOException {
+        assertFetch(dateNanosMapperService(), "field", randomIs8601Nanos(MAX_NANOS), "strict_date_optional_time_nanos");
+    }
+
+    public void testFetchNanosFromRoundedNanos() throws IOException {
+        assertFetch(dateNanosMapperService(), "field", randomDecimalNanos(MAX_NANOS), null);
+    }
+
+    /**
+     * Maximum date we can round trip through {@code date_nanos} without
+     * losing precision right now. We hope to be able to make this
+     * {@link #MAX_NANOS} soon.
+     * <p>
+     * Given the maximum precise value for a double (9,007,199,254,740,992)
+     * I'd expect this to be 1970-04-15T05:59:59.253Z but that causes
+     * errors. I'm curious about why but not curious enough to track it down.
+     */
+    private static final long MAX_MILLIS_DOUBLE_NANOS_KEEPS_PRECISION = DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis(
+        "1970-04-10T00:00:00.000Z"
+    );
+
+    /**
+     * Tests round tripping a date with nanosecond resolution through doc
+     * values and field fetching via the {@code date_nanos} field.
+     */
+    public void testFetchNanosFromFixedNanos() throws IOException {
+        assertFetch(
+            dateNanosMapperService(),
+            "field",
+            new BigDecimal(randomDecimalNanos(MAX_MILLIS_DOUBLE_NANOS_KEEPS_PRECISION)),
+            null
+        );
+    }
+
+    /**
+     * Tests round tripping a date with nanosecond resolution through doc
+     * values and field fetching via the {@code date_nanos} field when there
+     * is a format.
+     */
+    public void testFetchNanosFromFixedNanosFormatted() throws IOException {
+        assertFetch(
+            dateNanosMapperService(),
+            "field",
+            new BigDecimal(randomDecimalNanos(MAX_MILLIS_DOUBLE_NANOS_KEEPS_PRECISION)),
+            "strict_date_optional_time_nanos"
+        );
+    }
+
+    private MapperService dateNanosMapperService() throws IOException {
+        return createMapperService(mapping(b -> b.startObject("field").field("type", "date_nanos").endObject()));
+    }
+
+    private String randomIs8601Nanos(long maxMillis) {
+        String date = DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.formatMillis(randomLongBetween(0, maxMillis));
+        date = date.substring(0, date.length() - 1);  // Strip off trailing "Z"
+        return date + String.format(Locale.ROOT, "%06d", between(0, 999999)) + "Z";  // Add nanos and the "Z"
+    }
+
+    private String randomDecimalNanos(long maxMillis) {
+        return Long.toString(randomLongBetween(0, maxMillis)) + "." + between(0, 999999);
+    }
 }
diff --git a/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java
@@ -40,11 +40,14 @@
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.Collection;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 
 import static java.util.Collections.singletonList;
 import static java.util.Collections.singletonMap;
+import static java.util.stream.Collectors.toList;
 import static org.apache.lucene.analysis.BaseTokenStreamTestCase.assertTokenStreamContents;
 import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.equalTo;
@@ -458,4 +461,26 @@ public void testSplitQueriesOnWhitespace() throws IOException {
             new String[] { "hello world" }
         );
     }
+
+    public void testFetch() throws IOException {
+        assertFetch(keywordMapperService(), "field", randomAlphaOfLength(5), null);
+    }
+
+    public void testFetchMany() throws IOException {
+        /*
+         * When we have many values doc values will sort and unique them.
+         * Source fetching won't, but we expect that. So we test with
+         * sorted and uniqued values.
+         */
+        int count = between(2, 10);
+        Set<String> values = new HashSet<>();
+        while (values.size() < count) {
+            values.add(randomAlphaOfLength(5));
+        }
+        assertFetch(keywordMapperService(), "field", values.stream().sorted().collect(toList()), null);
+    }
+
+    private MapperService keywordMapperService() throws IOException {
+        return createMapperService(mapping(b -> b.startObject("field").field("type", "keyword").endObject()));
+    }
 }
diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java
@@ -33,20 +33,24 @@
 import org.elasticsearch.indices.breaker.NoneCircuitBreakerService;
 import org.elasticsearch.search.DocValueFormat;
 import org.elasticsearch.search.lookup.SearchLookup;
+import org.elasticsearch.search.lookup.SourceLookup;
 
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.function.BiFunction;
 import java.util.function.Consumer;
 import java.util.function.Supplier;
 
 import static org.hamcrest.Matchers.anyOf;
 import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.instanceOf;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
 
 /**
  * Base class for testing {@link Mapper}s.
@@ -452,4 +456,29 @@ public final void testTextSearchInfoConsistency() throws IOException {
     protected void assertSearchable(MappedFieldType fieldType) {
         assertEquals(fieldType.isSearchable(), fieldType.getTextSearchInfo() != TextSearchInfo.NONE);
     }
+
+    /**
+     * Assert that fetching a value using {@link MappedFieldType#valueFetcher}
+     * produces the same value as fetching using doc values.
+     */
+    protected void assertFetch(MapperService mapperService, String field, Object value, String format) throws IOException {
+        MappedFieldType ft = mapperService.fieldType(field);
+        SourceToParse source = source(b -> b.field(ft.name(), value));
+        ValueFetcher docValueFetcher = new DocValueFetcher(
+            ft.docValueFormat(format, null),
+            ft.fielddataBuilder("test", () -> null).build(new IndexFieldDataCache.None(), new NoneCircuitBreakerService())
+        );
+        SearchExecutionContext searchExecutionContext = mock(SearchExecutionContext.class);
+        when(searchExecutionContext.sourcePath(field)).thenReturn(Set.of(field));
+        ValueFetcher nativeFetcher = ft.valueFetcher(searchExecutionContext, format);
+        ParsedDocument doc = mapperService.documentMapper().parse(source);
+        withLuceneIndex(mapperService, iw -> iw.addDocuments(doc.docs()), ir -> {
+            SourceLookup sourceLookup = new SourceLookup();
+            sourceLookup.setSegmentAndDocument(ir.leaves().get(0), 0);
+            docValueFetcher.setNextReader(ir.leaves().get(0));
+            List<?> fromDocValues = docValueFetcher.fetchValues(sourceLookup);
+            List<?> fromNative = nativeFetcher.fetchValues(sourceLookup);
+            assertEquals(fromDocValues, fromNative);
+        });
+    }
 }