Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Patch up fetching dates from source #70040

Merged
merged 10 commits into from
Mar 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,10 @@ public static ZoneId of(String zoneId) {
return ZoneId.of(zoneId).normalized();
}

static final Instant MAX_NANOSECOND_INSTANT = Instant.parse("2262-04-11T23:47:16.854775807Z");
/**
* The maximum nanosecond resolution date we can properly handle.
*/
public static final Instant MAX_NANOSECOND_INSTANT = Instant.parse("2262-04-11T23:47:16.854775807Z");

static final long MAX_NANOSECOND_IN_MILLIS = MAX_NANOSECOND_INSTANT.toEpochMilli();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,12 @@ public static XContentParser createParser(NamedXContentRegistry xContentRegistry

/**
* Converts the given bytes into a map that is optionally ordered.
* <p>
* Important: This can lose precision on numbers with a decimal point. It
* converts numbers like {@code "n": 1234.567} to a {@code double} which
* only has 52 bits of precision in the mantissa. This will come up most
* frequently when folks write nanosecond precision dates as a decimal
* number.
* @deprecated this method relies on auto-detection of content type. Use {@link #convertToMap(BytesReference, boolean, XContentType)}
* instead with the proper {@link XContentType}
*/
Expand All @@ -88,6 +94,12 @@ public static Tuple<XContentType, Map<String, Object>> convertToMap(BytesReferen

/**
* Converts the given bytes into a map that is optionally ordered. The provided {@link XContentType} must be non-null.
* <p>
* Important: This can lose precision on numbers with a decimal point. It
* converts numbers like {@code "n": 1234.567} to a {@code double} which
* only has 52 bits of precision in the mantissa. This will come up most
* frequently when folks write nanosecond precision dates as a decimal
* number.
*/
public static Tuple<XContentType, Map<String, Object>> convertToMap(BytesReference bytes, boolean ordered, XContentType xContentType)
throws ElasticsearchParseException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
import org.elasticsearch.search.lookup.SearchLookup;

import java.io.IOException;
import java.text.NumberFormat;
import java.time.DateTimeException;
import java.time.Instant;
import java.time.ZoneId;
Expand Down Expand Up @@ -345,6 +346,23 @@ public long parse(String value) {
return resolution.convert(DateFormatters.from(dateTimeFormatter().parse(value), dateTimeFormatter().locale()).toInstant());
}

/**
* Format to use to resolve {@link Number}s from the source. Its valid
* to send the numbers with up to six digits after the decimal place
* and we'll parse them as {@code millis.nanos}. The source
* deseralization code isn't particularly careful here and can return
* {@link double} instead of the exact string in the {@code _source}.
* So we have to *get* that string.
* <p>
* Nik chose not to use {@link String#format} for this because it feels
* a little wasteful. It'd probably be fine but this makes Nik feel a
* less bad about the {@code instanceof} and the string allocation.
*/
private static final NumberFormat NUMBER_FORMAT = NumberFormat.getInstance(Locale.ROOT);
static {
NUMBER_FORMAT.setGroupingUsed(false);
NUMBER_FORMAT.setMaximumFractionDigits(6);
}
@Override
public ValueFetcher valueFetcher(SearchExecutionContext context, String format) {
DateFormatter defaultFormatter = dateTimeFormatter();
Expand All @@ -355,7 +373,8 @@ public ValueFetcher valueFetcher(SearchExecutionContext context, String format)
return new SourceValueFetcher(name(), context, nullValue) {
@Override
public String parseSourceValue(Object value) {
String date = value.toString();
String date = value instanceof Number ? NUMBER_FORMAT.format(value) : value.toString();
// TODO can we emit a warning if we're losing precision here? I'm not sure we can.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this will be used per each document in request, so just using HeaderWarning.addWarning(formattedMessage); would probably spam response headers.
I am not sure we have a convenient way of doing this. THe code could look like this...

 static Logger l = LogManager.getLogger("dateFieldMapperHeader");

    static {
        final LoggerContext context = (LoggerContext) LogManager.getContext(false);
        final Configuration configuration = context.getConfiguration();


        RateLimitingFilter rateLimitingFilter = new RateLimitingFilter();
        HeaderWarningAppender dateFieldMapperHeaderAppender = new HeaderWarningAppender("dateFieldMapperHeaderAppender", rateLimitingFilter);
        Loggers.addAppender(LogManager.getLogger("dateFieldMapperHeader"), dateFieldMapperHeaderAppender);


    }

and then a usage

                public String parseSourceValue(Object value) {
                    l.info(new ESLogMessage("someMessage").with(DeprecatedMessage.KEY_FIELD_NAME, someCleverSearchRequestUUID));

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah! I was more thinking about how to detect that we're losing precision. I think it's probably safest to leave it for a follow up change.

long timestamp = parse(date);
ZonedDateTime dateTime = resolution().toInstant(timestamp).atZone(ZoneOffset.UTC);
return formatter.format(dateTime);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,13 @@ public int docId() {

/**
* Return the source as a map that will be unchanged when the lookup
* moves to a different document
* moves to a different document.
* <p>
* Important: This can lose precision on numbers with a decimal point. It
* converts numbers like {@code "n": 1234.567} to a {@code double} which
* only has 52 bits of precision in the mantissa. This will come up most
* frequently when folks write nanosecond precision dates as a decimal
* number.
*/
public Map<String, Object> source() {
if (source != null) {
Expand Down Expand Up @@ -84,6 +90,15 @@ private static Tuple<XContentType, Map<String, Object>> sourceAsMapAndType(Bytes
return XContentHelper.convertToMap(source, false);
}

/**
* Get the source as a {@link Map} of java objects.
* <p>
* Important: This can lose precision on numbers with a decimal point. It
* converts numbers like {@code "n": 1234.567} to a {@code double} which
* only has 52 bits of precision in the mantissa. This will come up most
* frequently when folks write nanosecond precision dates as a decimal
* number.
*/
public static Map<String, Object> sourceAsMap(BytesReference source) throws ElasticsearchParseException {
return sourceAsMapAndType(source).v2();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,12 @@
import org.elasticsearch.search.DocValueFormat;

import java.io.IOException;
import java.math.BigDecimal;
import java.time.ZoneId;
import java.time.ZoneOffset;
import java.time.ZonedDateTime;
import java.util.List;
import java.util.Locale;

import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
Expand Down Expand Up @@ -356,4 +358,148 @@ public void testResolutionRounding() {
assertThat(up, equalTo(0L));
}
}

/**
* The max date iso8601 can parse. It'll format much larger dates.
*/
private static final long MAX_ISO_DATE = DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("9999-12-12T23:59:59.999Z");

public void testFetchMillis() throws IOException {
assertFetch(dateMapperService(), "field", randomLongBetween(0, Long.MAX_VALUE), null);
}

public void testFetchMillisFromMillisFormatted() throws IOException {
assertFetch(dateMapperService(), "field", randomLongBetween(0, Long.MAX_VALUE), "epoch_millis");
}

public void testFetchMillisFromMillisFormattedIso8601() throws IOException {
assertFetch(dateMapperService(), "field", randomLongBetween(0, Long.MAX_VALUE), "iso8601");
}

public void testFetchMillisFromIso8601() throws IOException {
assertFetch(
dateMapperService(),
"field",
DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.formatMillis(randomLongBetween(0, MAX_ISO_DATE)),
"iso8601"
);
}

public void testFetchMillisFromIso8601Nanos() throws IOException {
assertFetch(dateMapperService(), "field", randomIs8601Nanos(MAX_ISO_DATE), null);
}

public void testFetchMillisFromIso8601NanosFormatted() throws IOException {
assertFetch(dateMapperService(), "field", randomIs8601Nanos(MAX_ISO_DATE), "strict_date_optional_time_nanos");
}

/**
* Tests round tripping a date with nanosecond resolution through doc
* values and field fetching via the {@code date} field. We expect this to
* lose precision because the {@code date} field only supports millisecond
* resolution. But its important that this lose precision in the same
* way.
*/
public void testFetchMillisFromRoundedNanos() throws IOException {
assertFetch(dateMapperService(), "field", randomDecimalNanos(MAX_ISO_DATE), null);
}

/**
* Tests round tripping a date with nanosecond resolution through doc
* values and field fetching via the {@code date} field with a specific
* format. We expect this to lose precision because the {@code date}
* field only supports millisecond resolution. But its important that
* this lose precision in the same way.
*/
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/70085") // Fails about 1/1000 of the time because of rounding.
public void testFetchMillisFromFixedNanos() throws IOException {
assertFetch(dateMapperService(), "field", new BigDecimal(randomDecimalNanos(MAX_ISO_DATE)), null);
}

private MapperService dateMapperService() throws IOException {
return createMapperService(mapping(b -> b.startObject("field").field("type", "date").endObject()));
}

/**
* The maximum valid nanosecond date in milliseconds since epoch.
*/
private static final long MAX_NANOS = DateUtils.MAX_NANOSECOND_INSTANT.toEpochMilli();

public void testFetchNanos() throws IOException {
assertFetch(dateNanosMapperService(), "field", randomLongBetween(0, MAX_NANOS), null);
}

public void testFetchNanosFromMillisFormatted() throws IOException {
assertFetch(dateNanosMapperService(), "field", randomLongBetween(0, MAX_NANOS), "epoch_millis");
}

public void testFetchNanosFromMillisFormattedIso8601() throws IOException {
assertFetch(dateNanosMapperService(), "field", randomLongBetween(0, MAX_NANOS), "iso8601");
}

public void testFetchNanosFromIso8601Nanos() throws IOException {
assertFetch(dateNanosMapperService(), "field", randomIs8601Nanos(MAX_NANOS), null);
}

public void testFetchNanosFromIso8601NanosFormatted() throws IOException {
assertFetch(dateNanosMapperService(), "field", randomIs8601Nanos(MAX_NANOS), "strict_date_optional_time_nanos");
}

public void testFetchNanosFromRoundedNanos() throws IOException {
assertFetch(dateNanosMapperService(), "field", randomDecimalNanos(MAX_NANOS), null);
}

/**
* Maximum date we can round trip through {@code date_nanos} without
* losing precision right now. We hope to be able to make this
* {@link #MAX_NANOS} soon.
* <p>
* Given the maximum precise value for a double (9,007,199,254,740,992)
* I'd expect this to be 1970-04-15T05:59:59.253Z but that causes
* errors. I'm curious about why but not curious enough to track it down.
*/
private static final long MAX_MILLIS_DOUBLE_NANOS_KEEPS_PRECISION = DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis(
"1970-04-10T00:00:00.000Z"
);

/**
* Tests round tripping a date with nanosecond resolution through doc
* values and field fetching via the {@code date_nanos} field.
*/
public void testFetchNanosFromFixedNanos() throws IOException {
assertFetch(
dateNanosMapperService(),
"field",
new BigDecimal(randomDecimalNanos(MAX_MILLIS_DOUBLE_NANOS_KEEPS_PRECISION)),
null
);
}

/**
* Tests round tripping a date with nanosecond resolution through doc
* values and field fetching via the {@code date_nanos} field when there
* is a format.
*/
public void testFetchNanosFromFixedNanosFormatted() throws IOException {
assertFetch(
dateNanosMapperService(),
"field",
new BigDecimal(randomDecimalNanos(MAX_MILLIS_DOUBLE_NANOS_KEEPS_PRECISION)),
"strict_date_optional_time_nanos"
);
}

private MapperService dateNanosMapperService() throws IOException {
return createMapperService(mapping(b -> b.startObject("field").field("type", "date_nanos").endObject()));
}

private String randomIs8601Nanos(long maxMillis) {
String date = DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.formatMillis(randomLongBetween(0, maxMillis));
date = date.substring(0, date.length() - 1); // Strip off trailing "Z"
return date + String.format(Locale.ROOT, "%06d", between(0, 999999)) + "Z"; // Add nanos and the "Z"
}

private String randomDecimalNanos(long maxMillis) {
return Long.toString(randomLongBetween(0, maxMillis)) + "." + between(0, 999999);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,14 @@
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import static java.util.Collections.singletonList;
import static java.util.Collections.singletonMap;
import static java.util.stream.Collectors.toList;
import static org.apache.lucene.analysis.BaseTokenStreamTestCase.assertTokenStreamContents;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
Expand Down Expand Up @@ -458,4 +461,26 @@ public void testSplitQueriesOnWhitespace() throws IOException {
new String[] { "hello world" }
);
}

public void testFetch() throws IOException {
assertFetch(keywordMapperService(), "field", randomAlphaOfLength(5), null);
}

public void testFetchMany() throws IOException {
/*
* When we have many values doc values will sort and unique them.
* Source fetching won't, but we expect that. So we test with
* sorted and uniqued values.
*/
int count = between(2, 10);
Set<String> values = new HashSet<>();
while (values.size() < count) {
values.add(randomAlphaOfLength(5));
}
assertFetch(keywordMapperService(), "field", values.stream().sorted().collect(toList()), null);
}

private MapperService keywordMapperService() throws IOException {
return createMapperService(mapping(b -> b.startObject("field").field("type", "keyword").endObject()));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,20 +33,24 @@
import org.elasticsearch.indices.breaker.NoneCircuitBreakerService;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.lookup.SearchLookup;
import org.elasticsearch.search.lookup.SourceLookup;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.BiFunction;
import java.util.function.Consumer;
import java.util.function.Supplier;

import static org.hamcrest.Matchers.anyOf;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.instanceOf;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;

/**
* Base class for testing {@link Mapper}s.
Expand Down Expand Up @@ -452,4 +456,29 @@ public final void testTextSearchInfoConsistency() throws IOException {
protected void assertSearchable(MappedFieldType fieldType) {
assertEquals(fieldType.isSearchable(), fieldType.getTextSearchInfo() != TextSearchInfo.NONE);
}

/**
* Assert that fetching a value using {@link MappedFieldType#valueFetcher}
* produces the same value as fetching using doc values.
*/
protected void assertFetch(MapperService mapperService, String field, Object value, String format) throws IOException {
MappedFieldType ft = mapperService.fieldType(field);
SourceToParse source = source(b -> b.field(ft.name(), value));
ValueFetcher docValueFetcher = new DocValueFetcher(
ft.docValueFormat(format, null),
ft.fielddataBuilder("test", () -> null).build(new IndexFieldDataCache.None(), new NoneCircuitBreakerService())
);
SearchExecutionContext searchExecutionContext = mock(SearchExecutionContext.class);
when(searchExecutionContext.sourcePath(field)).thenReturn(Set.of(field));
ValueFetcher nativeFetcher = ft.valueFetcher(searchExecutionContext, format);
ParsedDocument doc = mapperService.documentMapper().parse(source);
withLuceneIndex(mapperService, iw -> iw.addDocuments(doc.docs()), ir -> {
SourceLookup sourceLookup = new SourceLookup();
sourceLookup.setSegmentAndDocument(ir.leaves().get(0), 0);
docValueFetcher.setNextReader(ir.leaves().get(0));
List<?> fromDocValues = docValueFetcher.fetchValues(sourceLookup);
List<?> fromNative = nativeFetcher.fetchValues(sourceLookup);
assertEquals(fromDocValues, fromNative);
});
}
}