Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create a custom parser for parsing ISO8601 datetime variants #106486

Merged
merged 21 commits into from
May 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions docs/changelog/106486.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
pr: 106486
summary: Create custom parser for ISO-8601 datetimes
area: Infra/Core
type: enhancement
issues:
- 102063
highlight:
title: New custom parser for ISO-8601 datetimes
body: |-
This introduces a new custom parser for ISO-8601 datetimes, for the `iso8601`, `strict_date_optional_time`, and
`strict_date_optional_time_nanos` built-in date formats. This provides a performance improvement over the
default Java date-time parsing. Whilst it maintains much of the same behaviour,
the new parser does not accept nonsensical date-time strings that have multiple fractional seconds fields
or multiple timezone specifiers. If the new parser fails to parse a string, it will then use the previous parser
to parse it. If a large proportion of the input data consists of these invalid strings, this may cause
a small performance degradation. If you wish to force the use of the old parsers regardless,
set the JVM property `es.datetime.java_time_parsers=true` on all ES nodes.
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.common.time;

import java.util.stream.IntStream;

/**
* A CharSequence that provides a subsequence of another CharSequence without allocating a new backing array (as String does)
*/
class CharSubSequence implements CharSequence {
private final CharSequence wrapped;
private final int startOffset; // inclusive
private final int endOffset; // exclusive

CharSubSequence(CharSequence wrapped, int startOffset, int endOffset) {
if (startOffset < 0) throw new IllegalArgumentException();
if (endOffset > wrapped.length()) throw new IllegalArgumentException();
if (endOffset < startOffset) throw new IllegalArgumentException();

this.wrapped = wrapped;
this.startOffset = startOffset;
this.endOffset = endOffset;
}

@Override
public int length() {
return endOffset - startOffset;
}

@Override
public char charAt(int index) {
int adjustedIndex = index + startOffset;
if (adjustedIndex < startOffset || adjustedIndex >= endOffset) throw new IndexOutOfBoundsException(index);
return wrapped.charAt(adjustedIndex);
}

@Override
public boolean isEmpty() {
return startOffset == endOffset;
}

@Override
public CharSequence subSequence(int start, int end) {
int adjustedStart = start + startOffset;
int adjustedEnd = end + startOffset;
if (adjustedStart < startOffset) throw new IndexOutOfBoundsException(start);
if (adjustedEnd > endOffset) throw new IndexOutOfBoundsException(end);
if (adjustedStart > adjustedEnd) throw new IndexOutOfBoundsException();

return wrapped.subSequence(adjustedStart, adjustedEnd);
}

@Override
public IntStream chars() {
return wrapped.chars().skip(startOffset).limit(endOffset - startOffset);
}

@Override
public String toString() {
return wrapped.subSequence(startOffset, endOffset).toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,10 @@
package org.elasticsearch.common.time;

import org.elasticsearch.common.Strings;
import org.elasticsearch.core.Booleans;
import org.elasticsearch.core.SuppressForbidden;
import org.elasticsearch.core.UpdateForV9;
import org.elasticsearch.logging.internal.spi.LoggerFactory;

import java.time.Instant;
import java.time.LocalDate;
Expand All @@ -30,6 +33,7 @@
import java.time.temporal.TemporalQuery;
import java.time.temporal.WeekFields;
import java.util.Locale;
import java.util.Set;
import java.util.stream.Stream;

import static java.time.temporal.ChronoField.DAY_OF_MONTH;
Expand All @@ -43,6 +47,24 @@

public class DateFormatters {

/**
* The ISO8601 parser is as close as possible to the java.time based parsers, but there are some strings
* that are no longer accepted (multiple fractional seconds, or multiple timezones) by the ISO parser.
* If a string cannot be parsed by the ISO parser, it then tries the java.time one.
* If there's lots of these strings, trying the ISO parser, then the java.time parser, might cause a performance drop.
* So provide a JVM option so that users can just use the java.time parsers, if they really need to.
*/
@UpdateForV9 // evaluate if we need to deprecate/remove this
private static final boolean JAVA_TIME_PARSERS_ONLY = Booleans.parseBoolean(System.getProperty("es.datetime.java_time_parsers"), false);

static {
// when this is used directly in tests ES logging may not have been initialized yet
LoggerFactory logger;
if (JAVA_TIME_PARSERS_ONLY && (logger = LoggerFactory.provider()) != null) {
logger.getLogger(DateFormatters.class).info("Using java.time datetime parsers only");
}
}

private static DateFormatter newDateFormatter(String format, DateTimeFormatter formatter) {
return new JavaDateFormatter(format, new JavaTimeDateTimePrinter(formatter), new JavaTimeDateTimeParser(formatter));
}
Expand Down Expand Up @@ -168,11 +190,18 @@ private static DateFormatter newDateFormatter(String format, DateTimeFormatter p
/**
* Returns a generic ISO datetime parser where the date is mandatory and the time is optional.
*/
private static final DateFormatter STRICT_DATE_OPTIONAL_TIME = newDateFormatter(
"strict_date_optional_time",
STRICT_DATE_OPTIONAL_TIME_PRINTER,
STRICT_DATE_OPTIONAL_TIME_FORMATTER
);
private static final DateFormatter STRICT_DATE_OPTIONAL_TIME;
static {
DateTimeParser javaTimeParser = new JavaTimeDateTimeParser(STRICT_DATE_OPTIONAL_TIME_FORMATTER);

STRICT_DATE_OPTIONAL_TIME = new JavaDateFormatter(
"strict_date_optional_time",
new JavaTimeDateTimePrinter(STRICT_DATE_OPTIONAL_TIME_PRINTER),
JAVA_TIME_PARSERS_ONLY
? new DateTimeParser[] { javaTimeParser }
: new DateTimeParser[] { new Iso8601DateTimeParser(Set.of(), false).withLocale(Locale.ROOT), javaTimeParser }
);
}

private static final DateTimeFormatter STRICT_DATE_OPTIONAL_TIME_FORMATTER_WITH_NANOS = new DateTimeFormatterBuilder().append(
STRICT_YEAR_MONTH_DAY_FORMATTER
Expand Down Expand Up @@ -224,51 +253,69 @@ private static DateFormatter newDateFormatter(String format, DateTimeFormatter p
/**
* Returns a generic ISO datetime parser where the date is mandatory and the time is optional with nanosecond resolution.
*/
private static final DateFormatter STRICT_DATE_OPTIONAL_TIME_NANOS = newDateFormatter(
"strict_date_optional_time_nanos",
STRICT_DATE_OPTIONAL_TIME_PRINTER_NANOS,
STRICT_DATE_OPTIONAL_TIME_FORMATTER_WITH_NANOS
);
private static final DateFormatter STRICT_DATE_OPTIONAL_TIME_NANOS;
static {
DateTimeParser javaTimeParser = new JavaTimeDateTimeParser(STRICT_DATE_OPTIONAL_TIME_FORMATTER_WITH_NANOS);

STRICT_DATE_OPTIONAL_TIME_NANOS = new JavaDateFormatter(
"strict_date_optional_time_nanos",
new JavaTimeDateTimePrinter(STRICT_DATE_OPTIONAL_TIME_PRINTER_NANOS),
JAVA_TIME_PARSERS_ONLY
? new DateTimeParser[] { javaTimeParser }
: new DateTimeParser[] {
new Iso8601DateTimeParser(Set.of(HOUR_OF_DAY, MINUTE_OF_HOUR, SECOND_OF_MINUTE), true).withLocale(Locale.ROOT),
javaTimeParser }
);
}

/**
* Returns a ISO 8601 compatible date time formatter and parser.
* This is not fully compatible to the existing spec, which would require far more edge cases, but merely compatible with the
* existing legacy joda time ISO date formatter
*/
private static final DateFormatter ISO_8601 = newDateFormatter(
"iso8601",
STRICT_DATE_OPTIONAL_TIME_PRINTER,
new DateTimeFormatterBuilder().append(STRICT_YEAR_MONTH_DAY_FORMATTER)
.optionalStart()
.appendLiteral('T')
.optionalStart()
.appendValue(HOUR_OF_DAY, 2, 2, SignStyle.NOT_NEGATIVE)
.optionalStart()
.appendLiteral(':')
.appendValue(MINUTE_OF_HOUR, 2, 2, SignStyle.NOT_NEGATIVE)
.optionalStart()
.appendLiteral(':')
.appendValue(SECOND_OF_MINUTE, 2, 2, SignStyle.NOT_NEGATIVE)
.optionalStart()
.appendFraction(NANO_OF_SECOND, 1, 9, true)
.optionalEnd()
.optionalStart()
.appendLiteral(",")
.appendFraction(NANO_OF_SECOND, 1, 9, false)
.optionalEnd()
.optionalEnd()
.optionalEnd()
.optionalEnd()
.optionalStart()
.appendZoneOrOffsetId()
.optionalEnd()
.optionalStart()
.append(TIME_ZONE_FORMATTER_NO_COLON)
.optionalEnd()
.optionalEnd()
.toFormatter(Locale.ROOT)
.withResolverStyle(ResolverStyle.STRICT)
);
private static final DateFormatter ISO_8601;
static {
DateTimeParser javaTimeParser = new JavaTimeDateTimeParser(
new DateTimeFormatterBuilder().append(STRICT_YEAR_MONTH_DAY_FORMATTER)
.optionalStart()
.appendLiteral('T')
.optionalStart()
.appendValue(HOUR_OF_DAY, 2, 2, SignStyle.NOT_NEGATIVE)
.optionalStart()
.appendLiteral(':')
.appendValue(MINUTE_OF_HOUR, 2, 2, SignStyle.NOT_NEGATIVE)
.optionalStart()
.appendLiteral(':')
.appendValue(SECOND_OF_MINUTE, 2, 2, SignStyle.NOT_NEGATIVE)
.optionalStart()
.appendFraction(NANO_OF_SECOND, 1, 9, true)
.optionalEnd()
.optionalStart()
.appendLiteral(",")
.appendFraction(NANO_OF_SECOND, 1, 9, false)
.optionalEnd()
.optionalEnd()
.optionalEnd()
.optionalEnd()
.optionalStart()
.appendZoneOrOffsetId()
.optionalEnd()
.optionalStart()
.append(TIME_ZONE_FORMATTER_NO_COLON)
.optionalEnd()
.optionalEnd()
.toFormatter(Locale.ROOT)
.withResolverStyle(ResolverStyle.STRICT)
);

ISO_8601 = new JavaDateFormatter(
"iso8601",
new JavaTimeDateTimePrinter(STRICT_DATE_OPTIONAL_TIME_PRINTER),
JAVA_TIME_PARSERS_ONLY
? new DateTimeParser[] { javaTimeParser }
: new DateTimeParser[] { new Iso8601DateTimeParser(Set.of(), false).withLocale(Locale.ROOT), javaTimeParser }
);
}

/////////////////////////////////////////
//
Expand Down
Loading