Skip to content

Commit

Permalink
Patch up fetching dates from source (backport of #70040) (#70146)
Browse files Browse the repository at this point in the history
This fixes an issue that `fields` has with dates sent in the
`###.######` format.

If you send a date in the format `#####.######` we'll parse the bit
before the decimal place as the number of milliseconds since epoch and
we'll parse the bit after the decimal as the number of nanoseconds since
the start of that millisecond. This works and is convenient for some
folks. Sadly, the code that back the `fields` API for dates doesn't work
with the string format in this case - it works with a `double`. `double`
is bad for two reasons:
1. It's default string representation is scientific notation and our
   parsers don't know how to deal with that.
2. It loses precision relative to the string representation. `double`
   only has 52 bits of mantissa which can precisely store the number of
   nanoseconds until about 6am on April 15th, 1970. After that it starts
   to lose precision.

This fixed the first issue, getting us the correct string
representation is a "quick and dirty" way. It just converts the `double`
back to a string. But we still lose precision. Fixing that would require
a larger change.....
  • Loading branch information
nik9000 authored Mar 9, 2021
1 parent 613c303 commit fa0351c
Show file tree
Hide file tree
Showing 7 changed files with 258 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,10 @@ public static ZoneId of(String zoneId) {
return ZoneId.of(zoneId).normalized();
}

static final Instant MAX_NANOSECOND_INSTANT = Instant.parse("2262-04-11T23:47:16.854775807Z");
/**
* The maximum nanosecond resolution date we can properly handle.
*/
public static final Instant MAX_NANOSECOND_INSTANT = Instant.parse("2262-04-11T23:47:16.854775807Z");

static final long MAX_NANOSECOND_IN_MILLIS = MAX_NANOSECOND_INSTANT.toEpochMilli();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,12 @@ public static XContentParser createParser(NamedXContentRegistry xContentRegistry

/**
* Converts the given bytes into a map that is optionally ordered.
* <p>
* Important: This can lose precision on numbers with a decimal point. It
* converts numbers like {@code "n": 1234.567} to a {@code double} which
* only has 52 bits of precision in the mantissa. This will come up most
* frequently when folks write nanosecond precision dates as a decimal
* number.
* @deprecated this method relies on auto-detection of content type. Use {@link #convertToMap(BytesReference, boolean, XContentType)}
* instead with the proper {@link XContentType}
*/
Expand All @@ -87,6 +93,12 @@ public static Tuple<XContentType, Map<String, Object>> convertToMap(BytesReferen

/**
* Converts the given bytes into a map that is optionally ordered. The provided {@link XContentType} must be non-null.
* <p>
* Important: This can lose precision on numbers with a decimal point. It
* converts numbers like {@code "n": 1234.567} to a {@code double} which
* only has 52 bits of precision in the mantissa. This will come up most
* frequently when folks write nanosecond precision dates as a decimal
* number.
*/
public static Tuple<XContentType, Map<String, Object>> convertToMap(BytesReference bytes, boolean ordered, XContentType xContentType)
throws ElasticsearchParseException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
import org.elasticsearch.search.lookup.SearchLookup;

import java.io.IOException;
import java.text.NumberFormat;
import java.time.DateTimeException;
import java.time.Instant;
import java.time.ZoneId;
Expand Down Expand Up @@ -349,6 +350,23 @@ public long parse(String value) {
return resolution.convert(DateFormatters.from(dateTimeFormatter().parse(value), dateTimeFormatter().locale()).toInstant());
}

/**
* Format to use to resolve {@link Number}s from the source. Its valid
* to send the numbers with up to six digits after the decimal place
* and we'll parse them as {@code millis.nanos}. The source
* deseralization code isn't particularly careful here and can return
* {@link double} instead of the exact string in the {@code _source}.
* So we have to *get* that string.
* <p>
* Nik chose not to use {@link String#format} for this because it feels
* a little wasteful. It'd probably be fine but this makes Nik feel a
* less bad about the {@code instanceof} and the string allocation.
*/
private static final NumberFormat NUMBER_FORMAT = NumberFormat.getInstance(Locale.ROOT);
static {
NUMBER_FORMAT.setGroupingUsed(false);
NUMBER_FORMAT.setMaximumFractionDigits(6);
}
@Override
public ValueFetcher valueFetcher(SearchExecutionContext context, String format) {
DateFormatter defaultFormatter = dateTimeFormatter();
Expand All @@ -359,7 +377,8 @@ public ValueFetcher valueFetcher(SearchExecutionContext context, String format)
return new SourceValueFetcher(name(), context, nullValue) {
@Override
public String parseSourceValue(Object value) {
String date = value.toString();
String date = value instanceof Number ? NUMBER_FORMAT.format(value) : value.toString();
// TODO can we emit a warning if we're losing precision here? I'm not sure we can.
long timestamp = parse(date);
ZonedDateTime dateTime = resolution().toInstant(timestamp).atZone(ZoneOffset.UTC);
return formatter.format(dateTime);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,13 @@ public int docId() {

/**
* Return the source as a map that will be unchanged when the lookup
* moves to a different document
* moves to a different document.
* <p>
* Important: This can lose precision on numbers with a decimal point. It
* converts numbers like {@code "n": 1234.567} to a {@code double} which
* only has 52 bits of precision in the mantissa. This will come up most
* frequently when folks write nanosecond precision dates as a decimal
* number.
*/
public Map<String, Object> source() {
if (source != null) {
Expand Down Expand Up @@ -84,6 +90,15 @@ private static Tuple<XContentType, Map<String, Object>> sourceAsMapAndType(Bytes
return XContentHelper.convertToMap(source, false);
}

/**
* Get the source as a {@link Map} of java objects.
* <p>
* Important: This can lose precision on numbers with a decimal point. It
* converts numbers like {@code "n": 1234.567} to a {@code double} which
* only has 52 bits of precision in the mantissa. This will come up most
* frequently when folks write nanosecond precision dates as a decimal
* number.
*/
public static Map<String, Object> sourceAsMap(BytesReference source) throws ElasticsearchParseException {
return sourceAsMapAndType(source).v2();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,18 @@
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexableField;
import org.elasticsearch.bootstrap.JavaVersion;
import org.elasticsearch.common.collect.List;
import org.elasticsearch.common.time.DateFormatter;
import org.elasticsearch.common.time.DateUtils;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.termvectors.TermVectorsService;
import org.elasticsearch.search.DocValueFormat;

import java.io.IOException;
import java.math.BigDecimal;
import java.time.ZoneId;
import java.time.ZoneOffset;
import java.time.ZonedDateTime;
import java.util.Locale;

import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
Expand Down Expand Up @@ -300,17 +301,15 @@ public void testIllegalFormatField() {
assertThat(e.getMessage(), containsString("Error parsing [format] on field [field]: Invalid"));
}



public void testFetchDocValuesMillis() throws IOException {
MapperService mapperService = createMapperService(
fieldMapping(b -> b.field("type", "date").field("format", "strict_date_time||epoch_millis"))
);
MappedFieldType ft = mapperService.fieldType("field");
DocValueFormat format = ft.docValueFormat(null, null);
String date = "2020-05-15T21:33:02.123Z";
assertEquals(List.of(date), fetchFromDocValues(mapperService, ft, format, date));
assertEquals(List.of(date), fetchFromDocValues(mapperService, ft, format, 1589578382123L));
assertEquals(org.elasticsearch.common.collect.List.of(date), fetchFromDocValues(mapperService, ft, format, date));
assertEquals(org.elasticsearch.common.collect.List.of(date), fetchFromDocValues(mapperService, ft, format, 1589578382123L));
}

public void testFetchDocValuesNanos() throws IOException {
Expand All @@ -320,8 +319,11 @@ public void testFetchDocValuesNanos() throws IOException {
MappedFieldType ft = mapperService.fieldType("field");
DocValueFormat format = ft.docValueFormat(null, null);
String date = "2020-05-15T21:33:02.123456789Z";
assertEquals(List.of(date), fetchFromDocValues(mapperService, ft, format, date));
assertEquals(List.of("2020-05-15T21:33:02.123Z"), fetchFromDocValues(mapperService, ft, format, 1589578382123L));
assertEquals(org.elasticsearch.common.collect.List.of(date), fetchFromDocValues(mapperService, ft, format, date));
assertEquals(
org.elasticsearch.common.collect.List.of("2020-05-15T21:33:02.123Z"),
fetchFromDocValues(mapperService, ft, format, 1589578382123L)
);
}

public void testResolutionRounding() {
Expand Down Expand Up @@ -352,4 +354,148 @@ public void testResolutionRounding() {
assertThat(up, equalTo(0L));
}
}

/**
* The max date iso8601 can parse. It'll format much larger dates.
*/
private static final long MAX_ISO_DATE = DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("9999-12-12T23:59:59.999Z");

public void testFetchMillis() throws IOException {
assertFetch(dateMapperService(), "field", randomLongBetween(0, Long.MAX_VALUE), null);
}

public void testFetchMillisFromMillisFormatted() throws IOException {
assertFetch(dateMapperService(), "field", randomLongBetween(0, Long.MAX_VALUE), "epoch_millis");
}

public void testFetchMillisFromMillisFormattedIso8601() throws IOException {
assertFetch(dateMapperService(), "field", randomLongBetween(0, Long.MAX_VALUE), "iso8601");
}

public void testFetchMillisFromIso8601() throws IOException {
assertFetch(
dateMapperService(),
"field",
DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.formatMillis(randomLongBetween(0, MAX_ISO_DATE)),
"iso8601"
);
}

public void testFetchMillisFromIso8601Nanos() throws IOException {
assertFetch(dateMapperService(), "field", randomIs8601Nanos(MAX_ISO_DATE), null);
}

public void testFetchMillisFromIso8601NanosFormatted() throws IOException {
assertFetch(dateMapperService(), "field", randomIs8601Nanos(MAX_ISO_DATE), "strict_date_optional_time_nanos");
}

/**
* Tests round tripping a date with nanosecond resolution through doc
* values and field fetching via the {@code date} field. We expect this to
* lose precision because the {@code date} field only supports millisecond
* resolution. But its important that this lose precision in the same
* way.
*/
public void testFetchMillisFromRoundedNanos() throws IOException {
assertFetch(dateMapperService(), "field", randomDecimalNanos(MAX_ISO_DATE), null);
}

/**
* Tests round tripping a date with nanosecond resolution through doc
* values and field fetching via the {@code date} field with a specific
* format. We expect this to lose precision because the {@code date}
* field only supports millisecond resolution. But its important that
* this lose precision in the same way.
*/
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/70085") // Fails about 1/1000 of the time because of rounding.
public void testFetchMillisFromFixedNanos() throws IOException {
assertFetch(dateMapperService(), "field", new BigDecimal(randomDecimalNanos(MAX_ISO_DATE)), null);
}

private MapperService dateMapperService() throws IOException {
return createMapperService(mapping(b -> b.startObject("field").field("type", "date").endObject()));
}

/**
* The maximum valid nanosecond date in milliseconds since epoch.
*/
private static final long MAX_NANOS = DateUtils.MAX_NANOSECOND_INSTANT.toEpochMilli();

public void testFetchNanos() throws IOException {
assertFetch(dateNanosMapperService(), "field", randomLongBetween(0, MAX_NANOS), null);
}

public void testFetchNanosFromMillisFormatted() throws IOException {
assertFetch(dateNanosMapperService(), "field", randomLongBetween(0, MAX_NANOS), "epoch_millis");
}

public void testFetchNanosFromMillisFormattedIso8601() throws IOException {
assertFetch(dateNanosMapperService(), "field", randomLongBetween(0, MAX_NANOS), "iso8601");
}

public void testFetchNanosFromIso8601Nanos() throws IOException {
assertFetch(dateNanosMapperService(), "field", randomIs8601Nanos(MAX_NANOS), null);
}

public void testFetchNanosFromIso8601NanosFormatted() throws IOException {
assertFetch(dateNanosMapperService(), "field", randomIs8601Nanos(MAX_NANOS), "strict_date_optional_time_nanos");
}

public void testFetchNanosFromRoundedNanos() throws IOException {
assertFetch(dateNanosMapperService(), "field", randomDecimalNanos(MAX_NANOS), null);
}

/**
* Maximum date we can round trip through {@code date_nanos} without
* losing precision right now. We hope to be able to make this
* {@link #MAX_NANOS} soon.
* <p>
* Given the maximum precise value for a double (9,007,199,254,740,992)
* I'd expect this to be 1970-04-15T05:59:59.253Z but that causes
* errors. I'm curious about why but not curious enough to track it down.
*/
private static final long MAX_MILLIS_DOUBLE_NANOS_KEEPS_PRECISION = DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis(
"1970-04-10T00:00:00.000Z"
);

/**
* Tests round tripping a date with nanosecond resolution through doc
* values and field fetching via the {@code date_nanos} field.
*/
public void testFetchNanosFromFixedNanos() throws IOException {
assertFetch(
dateNanosMapperService(),
"field",
new BigDecimal(randomDecimalNanos(MAX_MILLIS_DOUBLE_NANOS_KEEPS_PRECISION)),
null
);
}

/**
* Tests round tripping a date with nanosecond resolution through doc
* values and field fetching via the {@code date_nanos} field when there
* is a format.
*/
public void testFetchNanosFromFixedNanosFormatted() throws IOException {
assertFetch(
dateNanosMapperService(),
"field",
new BigDecimal(randomDecimalNanos(MAX_MILLIS_DOUBLE_NANOS_KEEPS_PRECISION)),
"strict_date_optional_time_nanos"
);
}

private MapperService dateNanosMapperService() throws IOException {
return createMapperService(mapping(b -> b.startObject("field").field("type", "date_nanos").endObject()));
}

private String randomIs8601Nanos(long maxMillis) {
String date = DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.formatMillis(randomLongBetween(0, maxMillis));
date = date.substring(0, date.length() - 1); // Strip off trailing "Z"
return date + String.format(Locale.ROOT, "%06d", between(0, 999999)) + "Z"; // Add nanos and the "Z"
}

private String randomDecimalNanos(long maxMillis) {
return Long.toString(randomLongBetween(0, maxMillis)) + "." + between(0, 999999);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,14 @@
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import static java.util.Collections.singletonList;
import static java.util.Collections.singletonMap;
import static java.util.stream.Collectors.toList;
import static org.apache.lucene.analysis.BaseTokenStreamTestCase.assertTokenStreamContents;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
Expand Down Expand Up @@ -470,4 +473,26 @@ public void testSplitQueriesOnWhitespace() throws IOException {
new String[] { "hello world" }
);
}

public void testFetch() throws IOException {
assertFetch(keywordMapperService(), "field", randomAlphaOfLength(5), null);
}

public void testFetchMany() throws IOException {
/*
* When we have many values doc values will sort and unique them.
* Source fetching won't, but we expect that. So we test with
* sorted and uniqued values.
*/
int count = between(2, 10);
Set<String> values = new HashSet<>();
while (values.size() < count) {
values.add(randomAlphaOfLength(5));
}
assertFetch(keywordMapperService(), "field", values.stream().sorted().collect(toList()), null);
}

private MapperService keywordMapperService() throws IOException {
return createMapperService(mapping(b -> b.startObject("field").field("type", "keyword").endObject()));
}
}
Loading

0 comments on commit fa0351c

Please sign in to comment.