From e01600d5a53b8e41e7a084b617fea8ae82560d06 Mon Sep 17 00:00:00 2001 From: Simon Cooper Date: Tue, 14 May 2024 11:02:42 +0100 Subject: [PATCH] Create a custom parser for parsing ISO8601 datetime variants (#106486) This adds a hand-written parser for parsing fixed ISO8601 datetime strings, for the `iso8601`, `strict_date_optional_time`, and `strict_date_optional_time_nanos` date formats. If the new parser fails to parse a string, the existing parsers are then tried, so existing behaviour is maintained. There is a new JVM option added that can force use of the existing parsers, if that is needed for any reason. --- docs/changelog/106486.yaml | 17 + .../common/time/CharSubSequence.java | 68 +++ .../common/time/DateFormatters.java | 135 +++-- .../elasticsearch/common/time/DateTime.java | 150 +++++ .../common/time/Iso8601DateTimeParser.java | 78 +++ .../common/time/Iso8601Parser.java | 521 ++++++++++++++++++ .../common/time/JavaDateFormatter.java | 19 + .../common/time/Iso8601ParserTests.java | 427 ++++++++++++++ 8 files changed, 1371 insertions(+), 44 deletions(-) create mode 100644 docs/changelog/106486.yaml create mode 100644 server/src/main/java/org/elasticsearch/common/time/CharSubSequence.java create mode 100644 server/src/main/java/org/elasticsearch/common/time/DateTime.java create mode 100644 server/src/main/java/org/elasticsearch/common/time/Iso8601DateTimeParser.java create mode 100644 server/src/main/java/org/elasticsearch/common/time/Iso8601Parser.java create mode 100644 server/src/test/java/org/elasticsearch/common/time/Iso8601ParserTests.java diff --git a/docs/changelog/106486.yaml b/docs/changelog/106486.yaml new file mode 100644 index 0000000000000..b33df50780e02 --- /dev/null +++ b/docs/changelog/106486.yaml @@ -0,0 +1,17 @@ +pr: 106486 +summary: Create custom parser for ISO-8601 datetimes +area: Infra/Core +type: enhancement +issues: + - 102063 +highlight: + title: New custom parser for ISO-8601 datetimes + body: |- + This introduces a new custom parser for ISO-8601 datetimes, for the `iso8601`, `strict_date_optional_time`, and + `strict_date_optional_time_nanos` built-in date formats. This provides a performance improvement over the + default Java date-time parsing. Whilst it maintains much of the same behaviour, + the new parser does not accept nonsensical date-time strings that have multiple fractional seconds fields + or multiple timezone specifiers. If the new parser fails to parse a string, it will then use the previous parser + to parse it. If a large proportion of the input data consists of these invalid strings, this may cause + a small performance degradation. If you wish to force the use of the old parsers regardless, + set the JVM property `es.datetime.java_time_parsers=true` on all ES nodes. diff --git a/server/src/main/java/org/elasticsearch/common/time/CharSubSequence.java b/server/src/main/java/org/elasticsearch/common/time/CharSubSequence.java new file mode 100644 index 0000000000000..39dbb83bdf5a4 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/common/time/CharSubSequence.java @@ -0,0 +1,68 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.common.time; + +import java.util.stream.IntStream; + +/** + * A CharSequence that provides a subsequence of another CharSequence without allocating a new backing array (as String does) + */ +class CharSubSequence implements CharSequence { + private final CharSequence wrapped; + private final int startOffset; // inclusive + private final int endOffset; // exclusive + + CharSubSequence(CharSequence wrapped, int startOffset, int endOffset) { + if (startOffset < 0) throw new IllegalArgumentException(); + if (endOffset > wrapped.length()) throw new IllegalArgumentException(); + if (endOffset < startOffset) throw new IllegalArgumentException(); + + this.wrapped = wrapped; + this.startOffset = startOffset; + this.endOffset = endOffset; + } + + @Override + public int length() { + return endOffset - startOffset; + } + + @Override + public char charAt(int index) { + int adjustedIndex = index + startOffset; + if (adjustedIndex < startOffset || adjustedIndex >= endOffset) throw new IndexOutOfBoundsException(index); + return wrapped.charAt(adjustedIndex); + } + + @Override + public boolean isEmpty() { + return startOffset == endOffset; + } + + @Override + public CharSequence subSequence(int start, int end) { + int adjustedStart = start + startOffset; + int adjustedEnd = end + startOffset; + if (adjustedStart < startOffset) throw new IndexOutOfBoundsException(start); + if (adjustedEnd > endOffset) throw new IndexOutOfBoundsException(end); + if (adjustedStart > adjustedEnd) throw new IndexOutOfBoundsException(); + + return wrapped.subSequence(adjustedStart, adjustedEnd); + } + + @Override + public IntStream chars() { + return wrapped.chars().skip(startOffset).limit(endOffset - startOffset); + } + + @Override + public String toString() { + return wrapped.subSequence(startOffset, endOffset).toString(); + } +} diff --git a/server/src/main/java/org/elasticsearch/common/time/DateFormatters.java b/server/src/main/java/org/elasticsearch/common/time/DateFormatters.java index 7dae11fb8d720..1133eac3f8f7b 100644 --- a/server/src/main/java/org/elasticsearch/common/time/DateFormatters.java +++ b/server/src/main/java/org/elasticsearch/common/time/DateFormatters.java @@ -9,7 +9,10 @@ package org.elasticsearch.common.time; import org.elasticsearch.common.Strings; +import org.elasticsearch.core.Booleans; import org.elasticsearch.core.SuppressForbidden; +import org.elasticsearch.core.UpdateForV9; +import org.elasticsearch.logging.internal.spi.LoggerFactory; import java.time.Instant; import java.time.LocalDate; @@ -30,6 +33,7 @@ import java.time.temporal.TemporalQuery; import java.time.temporal.WeekFields; import java.util.Locale; +import java.util.Set; import java.util.stream.Stream; import static java.time.temporal.ChronoField.DAY_OF_MONTH; @@ -43,6 +47,24 @@ public class DateFormatters { + /** + * The ISO8601 parser is as close as possible to the java.time based parsers, but there are some strings + * that are no longer accepted (multiple fractional seconds, or multiple timezones) by the ISO parser. + * If a string cannot be parsed by the ISO parser, it then tries the java.time one. + * If there's lots of these strings, trying the ISO parser, then the java.time parser, might cause a performance drop. + * So provide a JVM option so that users can just use the java.time parsers, if they really need to. + */ + @UpdateForV9 // evaluate if we need to deprecate/remove this + private static final boolean JAVA_TIME_PARSERS_ONLY = Booleans.parseBoolean(System.getProperty("es.datetime.java_time_parsers"), false); + + static { + // when this is used directly in tests ES logging may not have been initialized yet + LoggerFactory logger; + if (JAVA_TIME_PARSERS_ONLY && (logger = LoggerFactory.provider()) != null) { + logger.getLogger(DateFormatters.class).info("Using java.time datetime parsers only"); + } + } + private static DateFormatter newDateFormatter(String format, DateTimeFormatter formatter) { return new JavaDateFormatter(format, new JavaTimeDateTimePrinter(formatter), new JavaTimeDateTimeParser(formatter)); } @@ -168,11 +190,18 @@ private static DateFormatter newDateFormatter(String format, DateTimeFormatter p /** * Returns a generic ISO datetime parser where the date is mandatory and the time is optional. */ - private static final DateFormatter STRICT_DATE_OPTIONAL_TIME = newDateFormatter( - "strict_date_optional_time", - STRICT_DATE_OPTIONAL_TIME_PRINTER, - STRICT_DATE_OPTIONAL_TIME_FORMATTER - ); + private static final DateFormatter STRICT_DATE_OPTIONAL_TIME; + static { + DateTimeParser javaTimeParser = new JavaTimeDateTimeParser(STRICT_DATE_OPTIONAL_TIME_FORMATTER); + + STRICT_DATE_OPTIONAL_TIME = new JavaDateFormatter( + "strict_date_optional_time", + new JavaTimeDateTimePrinter(STRICT_DATE_OPTIONAL_TIME_PRINTER), + JAVA_TIME_PARSERS_ONLY + ? new DateTimeParser[] { javaTimeParser } + : new DateTimeParser[] { new Iso8601DateTimeParser(Set.of(), false).withLocale(Locale.ROOT), javaTimeParser } + ); + } private static final DateTimeFormatter STRICT_DATE_OPTIONAL_TIME_FORMATTER_WITH_NANOS = new DateTimeFormatterBuilder().append( STRICT_YEAR_MONTH_DAY_FORMATTER @@ -224,51 +253,69 @@ private static DateFormatter newDateFormatter(String format, DateTimeFormatter p /** * Returns a generic ISO datetime parser where the date is mandatory and the time is optional with nanosecond resolution. */ - private static final DateFormatter STRICT_DATE_OPTIONAL_TIME_NANOS = newDateFormatter( - "strict_date_optional_time_nanos", - STRICT_DATE_OPTIONAL_TIME_PRINTER_NANOS, - STRICT_DATE_OPTIONAL_TIME_FORMATTER_WITH_NANOS - ); + private static final DateFormatter STRICT_DATE_OPTIONAL_TIME_NANOS; + static { + DateTimeParser javaTimeParser = new JavaTimeDateTimeParser(STRICT_DATE_OPTIONAL_TIME_FORMATTER_WITH_NANOS); + + STRICT_DATE_OPTIONAL_TIME_NANOS = new JavaDateFormatter( + "strict_date_optional_time_nanos", + new JavaTimeDateTimePrinter(STRICT_DATE_OPTIONAL_TIME_PRINTER_NANOS), + JAVA_TIME_PARSERS_ONLY + ? new DateTimeParser[] { javaTimeParser } + : new DateTimeParser[] { + new Iso8601DateTimeParser(Set.of(HOUR_OF_DAY, MINUTE_OF_HOUR, SECOND_OF_MINUTE), true).withLocale(Locale.ROOT), + javaTimeParser } + ); + } /** * Returns a ISO 8601 compatible date time formatter and parser. * This is not fully compatible to the existing spec, which would require far more edge cases, but merely compatible with the * existing legacy joda time ISO date formatter */ - private static final DateFormatter ISO_8601 = newDateFormatter( - "iso8601", - STRICT_DATE_OPTIONAL_TIME_PRINTER, - new DateTimeFormatterBuilder().append(STRICT_YEAR_MONTH_DAY_FORMATTER) - .optionalStart() - .appendLiteral('T') - .optionalStart() - .appendValue(HOUR_OF_DAY, 2, 2, SignStyle.NOT_NEGATIVE) - .optionalStart() - .appendLiteral(':') - .appendValue(MINUTE_OF_HOUR, 2, 2, SignStyle.NOT_NEGATIVE) - .optionalStart() - .appendLiteral(':') - .appendValue(SECOND_OF_MINUTE, 2, 2, SignStyle.NOT_NEGATIVE) - .optionalStart() - .appendFraction(NANO_OF_SECOND, 1, 9, true) - .optionalEnd() - .optionalStart() - .appendLiteral(",") - .appendFraction(NANO_OF_SECOND, 1, 9, false) - .optionalEnd() - .optionalEnd() - .optionalEnd() - .optionalEnd() - .optionalStart() - .appendZoneOrOffsetId() - .optionalEnd() - .optionalStart() - .append(TIME_ZONE_FORMATTER_NO_COLON) - .optionalEnd() - .optionalEnd() - .toFormatter(Locale.ROOT) - .withResolverStyle(ResolverStyle.STRICT) - ); + private static final DateFormatter ISO_8601; + static { + DateTimeParser javaTimeParser = new JavaTimeDateTimeParser( + new DateTimeFormatterBuilder().append(STRICT_YEAR_MONTH_DAY_FORMATTER) + .optionalStart() + .appendLiteral('T') + .optionalStart() + .appendValue(HOUR_OF_DAY, 2, 2, SignStyle.NOT_NEGATIVE) + .optionalStart() + .appendLiteral(':') + .appendValue(MINUTE_OF_HOUR, 2, 2, SignStyle.NOT_NEGATIVE) + .optionalStart() + .appendLiteral(':') + .appendValue(SECOND_OF_MINUTE, 2, 2, SignStyle.NOT_NEGATIVE) + .optionalStart() + .appendFraction(NANO_OF_SECOND, 1, 9, true) + .optionalEnd() + .optionalStart() + .appendLiteral(",") + .appendFraction(NANO_OF_SECOND, 1, 9, false) + .optionalEnd() + .optionalEnd() + .optionalEnd() + .optionalEnd() + .optionalStart() + .appendZoneOrOffsetId() + .optionalEnd() + .optionalStart() + .append(TIME_ZONE_FORMATTER_NO_COLON) + .optionalEnd() + .optionalEnd() + .toFormatter(Locale.ROOT) + .withResolverStyle(ResolverStyle.STRICT) + ); + + ISO_8601 = new JavaDateFormatter( + "iso8601", + new JavaTimeDateTimePrinter(STRICT_DATE_OPTIONAL_TIME_PRINTER), + JAVA_TIME_PARSERS_ONLY + ? new DateTimeParser[] { javaTimeParser } + : new DateTimeParser[] { new Iso8601DateTimeParser(Set.of(), false).withLocale(Locale.ROOT), javaTimeParser } + ); + } ///////////////////////////////////////// // diff --git a/server/src/main/java/org/elasticsearch/common/time/DateTime.java b/server/src/main/java/org/elasticsearch/common/time/DateTime.java new file mode 100644 index 0000000000000..101389b43d9fc --- /dev/null +++ b/server/src/main/java/org/elasticsearch/common/time/DateTime.java @@ -0,0 +1,150 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.common.time; + +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.LocalTime; +import java.time.ZoneId; +import java.time.ZoneOffset; +import java.time.temporal.ChronoField; +import java.time.temporal.TemporalAccessor; +import java.time.temporal.TemporalField; +import java.time.temporal.TemporalQueries; +import java.time.temporal.TemporalQuery; +import java.time.temporal.UnsupportedTemporalTypeException; + +/** + * Provides information on a parsed datetime + */ +record DateTime( + int years, + Integer months, + Integer days, + Integer hours, + Integer minutes, + Integer seconds, + Integer nanos, + ZoneId zoneId, + ZoneOffset offset +) implements TemporalAccessor { + + @Override + @SuppressWarnings("unchecked") + public R query(TemporalQuery query) { + // shortcut a few queries used by DateFormatters.from + if (query == TemporalQueries.zoneId()) { + return (R) zoneId; + } + if (query == TemporalQueries.offset()) { + return (R) offset; + } + if (query == DateFormatters.LOCAL_DATE_QUERY || query == TemporalQueries.localDate()) { + if (months != null && days != null) { + return (R) LocalDate.of(years, months, days); + } + return null; + } + if (query == TemporalQueries.localTime()) { + if (hours != null && minutes != null && seconds != null) { + return (R) LocalTime.of(hours, minutes, seconds, nanos != null ? nanos : 0); + } + return null; + } + return TemporalAccessor.super.query(query); + } + + @Override + public boolean isSupported(TemporalField field) { + if (field instanceof ChronoField f) { + return switch (f) { + case YEAR -> true; + case MONTH_OF_YEAR -> months != null; + case DAY_OF_MONTH -> days != null; + case HOUR_OF_DAY -> hours != null; + case MINUTE_OF_HOUR -> minutes != null; + case SECOND_OF_MINUTE -> seconds != null; + case INSTANT_SECONDS -> months != null && days != null && hours != null && minutes != null && seconds != null; + // if the time components are there, we just default nanos to 0 if it's not present + case SECOND_OF_DAY, NANO_OF_SECOND, NANO_OF_DAY -> hours != null && minutes != null && seconds != null; + case OFFSET_SECONDS -> offset != null; + default -> false; + }; + } + + return field.isSupportedBy(this); + } + + @Override + public long getLong(TemporalField field) { + if (field instanceof ChronoField f) { + switch (f) { + case YEAR -> { + return years; + } + case MONTH_OF_YEAR -> { + return extractValue(f, months); + } + case DAY_OF_MONTH -> { + return extractValue(f, days); + } + case HOUR_OF_DAY -> { + return extractValue(f, hours); + } + case MINUTE_OF_HOUR -> { + return extractValue(f, minutes); + } + case SECOND_OF_MINUTE -> { + return extractValue(f, seconds); + } + case INSTANT_SECONDS -> { + if (isSupported(ChronoField.INSTANT_SECONDS) == false) { + throw new UnsupportedTemporalTypeException("No " + f + " value available"); + } + return LocalDateTime.of(years, months, days, hours, minutes, seconds) + .toEpochSecond(offset != null ? offset : ZoneOffset.UTC); + } + case SECOND_OF_DAY -> { + if (isSupported(ChronoField.SECOND_OF_DAY) == false) { + throw new UnsupportedTemporalTypeException("No " + f + " value available"); + } + return LocalTime.of(hours, minutes, seconds).toSecondOfDay(); + } + case NANO_OF_SECOND -> { + if (isSupported(ChronoField.NANO_OF_SECOND) == false) { + throw new UnsupportedTemporalTypeException("No " + f + " value available"); + } + return nanos != null ? nanos.longValue() : 0L; + } + case NANO_OF_DAY -> { + if (isSupported(ChronoField.NANO_OF_DAY) == false) { + throw new UnsupportedTemporalTypeException("No " + f + " value available"); + } + return LocalTime.of(hours, minutes, seconds, nanos != null ? nanos : 0).toNanoOfDay(); + } + case OFFSET_SECONDS -> { + if (offset == null) { + throw new UnsupportedTemporalTypeException("No " + f + " value available"); + } + return offset.getTotalSeconds(); + } + default -> throw new UnsupportedTemporalTypeException("No " + f + " value available"); + } + } + + return field.getFrom(this); + } + + private static long extractValue(ChronoField field, Number value) { + if (value == null) { + throw new UnsupportedTemporalTypeException("No " + field + " value available"); + } + return value.longValue(); + } +} diff --git a/server/src/main/java/org/elasticsearch/common/time/Iso8601DateTimeParser.java b/server/src/main/java/org/elasticsearch/common/time/Iso8601DateTimeParser.java new file mode 100644 index 0000000000000..2a526a36408ce --- /dev/null +++ b/server/src/main/java/org/elasticsearch/common/time/Iso8601DateTimeParser.java @@ -0,0 +1,78 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.common.time; + +import java.time.ZoneId; +import java.time.format.DateTimeParseException; +import java.time.temporal.ChronoField; +import java.time.temporal.TemporalAccessor; +import java.util.Locale; +import java.util.Map; +import java.util.Optional; +import java.util.Set; + +class Iso8601DateTimeParser implements DateTimeParser { + + private final Iso8601Parser parser; + private final ZoneId timezone; + // the locale doesn't actually matter, as we're parsing in a standardised format + // and we already account for . or , in decimals + private final Locale locale; + + Iso8601DateTimeParser(Set mandatoryFields, boolean optionalTime) { + parser = new Iso8601Parser(mandatoryFields, optionalTime, Map.of()); + timezone = null; + locale = null; + } + + private Iso8601DateTimeParser(Iso8601Parser parser, ZoneId timezone, Locale locale) { + this.parser = parser; + this.timezone = timezone; + this.locale = locale; + } + + @Override + public ZoneId getZone() { + return timezone; + } + + @Override + public Locale getLocale() { + return locale; + } + + @Override + public DateTimeParser withZone(ZoneId zone) { + return new Iso8601DateTimeParser(parser, zone, locale); + } + + @Override + public DateTimeParser withLocale(Locale locale) { + return new Iso8601DateTimeParser(parser, timezone, locale); + } + + Iso8601DateTimeParser withDefaults(Map defaults) { + return new Iso8601DateTimeParser(new Iso8601Parser(parser.mandatoryFields(), parser.optionalTime(), defaults), timezone, locale); + } + + @Override + public TemporalAccessor parse(CharSequence str) { + var result = parser.tryParse(str, timezone); + var temporal = result.result(); + if (temporal == null) { + throw new DateTimeParseException("Could not fully parse datetime", str, result.errorIndex()); + } + return temporal; + } + + @Override + public Optional tryParse(CharSequence str) { + return Optional.ofNullable(parser.tryParse(str, timezone).result()); + } +} diff --git a/server/src/main/java/org/elasticsearch/common/time/Iso8601Parser.java b/server/src/main/java/org/elasticsearch/common/time/Iso8601Parser.java new file mode 100644 index 0000000000000..4f1d131dd8ced --- /dev/null +++ b/server/src/main/java/org/elasticsearch/common/time/Iso8601Parser.java @@ -0,0 +1,521 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.common.time; + +import org.elasticsearch.core.Nullable; + +import java.time.DateTimeException; +import java.time.ZoneId; +import java.time.ZoneOffset; +import java.time.temporal.ChronoField; +import java.util.EnumMap; +import java.util.EnumSet; +import java.util.Map; +import java.util.Set; + +/** + * Parses datetimes in ISO8601 format (and subsequences thereof). + *

+ * This is faster than the generic parsing in {@link java.time.format.DateTimeFormatter}, as this is hard-coded and specific to ISO-8601. + * Various public libraries provide their own variant of this mechanism. We use our own for a few reasons: + *

    + *
  • + * We are historically a bit more lenient with strings that are invalid according to the strict specification + * (eg using a zone region instead of offset for timezone) + *
  • + *
  • Various built-in formats specify some fields as mandatory and some as optional
  • + *
  • Callers can specify defaults for fields that are not present (eg for roundup parsers)
  • + *
+ * We also do not use exceptions here, instead returning {@code null} for any invalid values, that are then + * checked and propagated as appropriate. + */ +class Iso8601Parser { + + /** + * The result of the parse. If successful, {@code result} will be non-null. + * If parse failed, {@code errorIndex} specifies the index into the parsed string + * that the first invalid data was encountered. + */ + record Result(@Nullable DateTime result, int errorIndex) { + Result(DateTime result) { + this(result, -1); + } + + static Result error(int errorIndex) { + return new Result(null, errorIndex); + } + } + + private static final Set VALID_MANDATORY_FIELDS = EnumSet.of( + ChronoField.YEAR, + ChronoField.MONTH_OF_YEAR, + ChronoField.DAY_OF_MONTH, + ChronoField.HOUR_OF_DAY, + ChronoField.MINUTE_OF_HOUR, + ChronoField.SECOND_OF_MINUTE + ); + + private static final Set VALID_DEFAULT_FIELDS = EnumSet.of( + ChronoField.MONTH_OF_YEAR, + ChronoField.DAY_OF_MONTH, + ChronoField.HOUR_OF_DAY, + ChronoField.MINUTE_OF_HOUR, + ChronoField.SECOND_OF_MINUTE, + ChronoField.NANO_OF_SECOND + ); + + private final Set mandatoryFields; + private final boolean optionalTime; + private final Map defaults; + + /** + * Constructs a new {@code Iso8601Parser} object + * + * @param mandatoryFields + * The set of fields that must be present for a valid parse. These should be specified in field order + * (eg if {@link ChronoField#DAY_OF_MONTH} is specified, {@link ChronoField#MONTH_OF_YEAR} should also be specified). + * {@link ChronoField#YEAR} is always mandatory. + * @param optionalTime + * {@code false} if the presence of time fields follows {@code mandatoryFields}, + * {@code true} if a time component is always optional, despite the presence of time fields in {@code mandatoryFields}. + * This makes it possible to specify 'time is optional, but if it is present, it must have these fields' + * by settings {@code optionalTime = true} and putting time fields such as {@link ChronoField#HOUR_OF_DAY} + * and {@link ChronoField#MINUTE_OF_HOUR} in {@code mandatoryFields}. + * @param defaults + * Map of default field values, if they are not present in the parsed string. + */ + Iso8601Parser(Set mandatoryFields, boolean optionalTime, Map defaults) { + checkChronoFields(mandatoryFields, VALID_MANDATORY_FIELDS); + checkChronoFields(defaults.keySet(), VALID_DEFAULT_FIELDS); + + this.mandatoryFields = EnumSet.of(ChronoField.YEAR); // year is always mandatory + this.mandatoryFields.addAll(mandatoryFields); + this.optionalTime = optionalTime; + this.defaults = defaults.isEmpty() ? Map.of() : new EnumMap<>(defaults); + } + + private static void checkChronoFields(Set fields, Set validFields) { + if (fields.isEmpty()) return; // nothing to check + + fields = EnumSet.copyOf(fields); + fields.removeAll(validFields); + if (fields.isEmpty() == false) { + throw new IllegalArgumentException("Invalid chrono fields specified " + fields); + } + } + + boolean optionalTime() { + return optionalTime; + } + + Set mandatoryFields() { + return mandatoryFields; + } + + private boolean isOptional(ChronoField field) { + return mandatoryFields.contains(field) == false; + } + + private Integer defaultZero(ChronoField field) { + return defaults.getOrDefault(field, 0); + } + + /** + * Attempts to parse {@code str} as an ISO-8601 datetime, returning a {@link Result} indicating if the parse + * was successful or not, and what fields were present. + * @param str The string to parse + * @param defaultTimezone The default timezone to return, if no timezone is present in the string + * @return The {@link Result} of the parse. + */ + Result tryParse(CharSequence str, @Nullable ZoneId defaultTimezone) { + if (str.charAt(0) == '-') { + // the year is negative. This is most unusual. + // Instead of always adding offsets and dynamically calculating position in the main parser code below, + // just in case it starts with a -, just parse the substring, then adjust the output appropriately + Result result = parse(new CharSubSequence(str, 1, str.length()), defaultTimezone); + + if (result.errorIndex() >= 0) { + return Result.error(result.errorIndex() + 1); + } else { + DateTime dt = result.result(); + return new Result( + new DateTime( + -dt.years(), + dt.months(), + dt.days(), + dt.hours(), + dt.minutes(), + dt.seconds(), + dt.nanos(), + dt.zoneId(), + dt.offset() + ) + ); + } + } else { + return parse(str, defaultTimezone); + } + } + + /** + * Index {@code i} is the multiplicand to get the number of nanos from the fractional second with {@code i=9-d} digits. + */ + private static final int[] NANO_MULTIPLICANDS = new int[] { 1, 10, 100, 1_000, 10_000, 100_000, 1_000_000, 10_000_000, 100_000_000 }; + + /** + * Parses {@code str} in ISO8601 format. + *

+ * This parses the string using fixed offsets (it does not support variable-width fields) and separators, + * sequentially parsing each field and looking for the correct separator. + * This enables it to be very fast, as all the fields are in fixed places in the string. + * The only variable aspect comes from the timezone, which (fortunately) is only present at the end of the string, + * at any point after a time field. + * It also does not use exceptions, instead returning {@code null} where a value cannot be parsed. + */ + private Result parse(CharSequence str, @Nullable ZoneId defaultTimezone) { + int len = str.length(); + + // YEARS + Integer years = parseInt(str, 0, 4); + if (years == null) return Result.error(0); + if (len == 4) { + return isOptional(ChronoField.MONTH_OF_YEAR) + ? new Result( + withZoneOffset( + years, + defaults.get(ChronoField.MONTH_OF_YEAR), + defaults.get(ChronoField.DAY_OF_MONTH), + defaults.get(ChronoField.HOUR_OF_DAY), + defaults.get(ChronoField.MINUTE_OF_HOUR), + defaults.get(ChronoField.SECOND_OF_MINUTE), + defaults.get(ChronoField.NANO_OF_SECOND), + defaultTimezone + ) + ) + : Result.error(4); + } + + if (str.charAt(4) != '-') return Result.error(4); + + // MONTHS + Integer months = parseInt(str, 5, 7); + if (months == null || months > 12) return Result.error(5); + if (len == 7) { + return isOptional(ChronoField.DAY_OF_MONTH) + ? new Result( + withZoneOffset( + years, + months, + defaults.get(ChronoField.DAY_OF_MONTH), + defaults.get(ChronoField.HOUR_OF_DAY), + defaults.get(ChronoField.MINUTE_OF_HOUR), + defaults.get(ChronoField.SECOND_OF_MINUTE), + defaults.get(ChronoField.NANO_OF_SECOND), + defaultTimezone + ) + ) + : Result.error(7); + } + + if (str.charAt(7) != '-') return Result.error(7); + + // DAYS + Integer days = parseInt(str, 8, 10); + if (days == null || days > 31) return Result.error(8); + if (len == 10) { + return optionalTime || isOptional(ChronoField.HOUR_OF_DAY) + ? new Result( + withZoneOffset( + years, + months, + days, + defaults.get(ChronoField.HOUR_OF_DAY), + defaults.get(ChronoField.MINUTE_OF_HOUR), + defaults.get(ChronoField.SECOND_OF_MINUTE), + defaults.get(ChronoField.NANO_OF_SECOND), + defaultTimezone + ) + ) + : Result.error(10); + } + + if (str.charAt(10) != 'T') return Result.error(10); + if (len == 11) { + return isOptional(ChronoField.HOUR_OF_DAY) + ? new Result( + withZoneOffset( + years, + months, + days, + defaults.get(ChronoField.HOUR_OF_DAY), + defaults.get(ChronoField.MINUTE_OF_HOUR), + defaults.get(ChronoField.SECOND_OF_MINUTE), + defaults.get(ChronoField.NANO_OF_SECOND), + defaultTimezone + ) + ) + : Result.error(11); + } + + // HOURS + timezone + Integer hours = parseInt(str, 11, 13); + if (hours == null || hours > 23) return Result.error(11); + if (len == 13) { + return isOptional(ChronoField.MINUTE_OF_HOUR) + ? new Result( + withZoneOffset( + years, + months, + days, + hours, + defaultZero(ChronoField.MINUTE_OF_HOUR), + defaultZero(ChronoField.SECOND_OF_MINUTE), + defaultZero(ChronoField.NANO_OF_SECOND), + defaultTimezone + ) + ) + : Result.error(13); + } + if (isZoneId(str, 13)) { + ZoneId timezone = parseZoneId(str, 13); + return timezone != null && isOptional(ChronoField.MINUTE_OF_HOUR) + ? new Result( + withZoneOffset( + years, + months, + days, + hours, + defaultZero(ChronoField.MINUTE_OF_HOUR), + defaultZero(ChronoField.SECOND_OF_MINUTE), + defaultZero(ChronoField.NANO_OF_SECOND), + timezone + ) + ) + : Result.error(13); + } + + if (str.charAt(13) != ':') return Result.error(13); + + // MINUTES + timezone + Integer minutes = parseInt(str, 14, 16); + if (minutes == null || minutes > 59) return Result.error(14); + if (len == 16) { + return isOptional(ChronoField.SECOND_OF_MINUTE) + ? new Result( + withZoneOffset( + years, + months, + days, + hours, + minutes, + defaultZero(ChronoField.SECOND_OF_MINUTE), + defaultZero(ChronoField.NANO_OF_SECOND), + defaultTimezone + ) + ) + : Result.error(16); + } + if (isZoneId(str, 16)) { + ZoneId timezone = parseZoneId(str, 16); + return timezone != null && isOptional(ChronoField.SECOND_OF_MINUTE) + ? new Result( + withZoneOffset( + years, + months, + days, + hours, + minutes, + defaultZero(ChronoField.SECOND_OF_MINUTE), + defaultZero(ChronoField.NANO_OF_SECOND), + timezone + ) + ) + : Result.error(16); + } + + if (str.charAt(16) != ':') return Result.error(16); + + // SECONDS + timezone + Integer seconds = parseInt(str, 17, 19); + if (seconds == null || seconds > 59) return Result.error(17); + if (len == 19) { + return new Result( + withZoneOffset(years, months, days, hours, minutes, seconds, defaultZero(ChronoField.NANO_OF_SECOND), defaultTimezone) + ); + } + if (isZoneId(str, 19)) { + ZoneId timezone = parseZoneId(str, 19); + return timezone != null + ? new Result( + withZoneOffset(years, months, days, hours, minutes, seconds, defaultZero(ChronoField.NANO_OF_SECOND), timezone) + ) + : Result.error(19); + } + + char decSeparator = str.charAt(19); + if (decSeparator != '.' && decSeparator != ',') return Result.error(19); + + // NANOS + timezone + // nanos are always optional + // the last number could be millis or nanos, or any combination in the middle + // so we keep parsing numbers until we get to not a number + int nanos = 0; + int pos; + for (pos = 20; pos < len && pos < 29; pos++) { + char c = str.charAt(pos); + if (c < ZERO || c > NINE) break; + nanos = nanos * 10 + (c - ZERO); + } + + if (pos == 20) return Result.error(20); // didn't find a number at all + + // multiply it by the correct multiplicand to get the nanos + nanos *= NANO_MULTIPLICANDS[29 - pos]; + + if (len == pos) { + return new Result(withZoneOffset(years, months, days, hours, minutes, seconds, nanos, defaultTimezone)); + } + if (isZoneId(str, pos)) { + ZoneId timezone = parseZoneId(str, pos); + return timezone != null + ? new Result(withZoneOffset(years, months, days, hours, minutes, seconds, nanos, timezone)) + : Result.error(pos); + } + + // still chars left at the end - string is not valid + return Result.error(pos); + } + + private static boolean isZoneId(CharSequence str, int pos) { + // all region zoneIds must start with [A-Za-z] (see ZoneId#of) + // this also covers Z and UT/UTC/GMT zone variants + char c = str.charAt(pos); + return c == '+' || c == '-' || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); + } + + /** + * This parses the zone offset, which is of the format accepted by {@link java.time.ZoneId#of(String)}. + * It has fast paths for numerical offsets, but falls back on {@code ZoneId.of} for non-trivial zone ids. + */ + private ZoneId parseZoneId(CharSequence str, int pos) { + int len = str.length(); + char first = str.charAt(pos); + + if (first == 'Z' && len == pos + 1) { + return ZoneOffset.UTC; + } + + boolean positive; + switch (first) { + case '+' -> positive = true; + case '-' -> positive = false; + default -> { + // non-trivial zone offset, fallback on the built-in java zoneid parser + try { + return ZoneId.of(str.subSequence(pos, str.length()).toString()); + } catch (DateTimeException e) { + return null; + } + } + } + pos++; // read the + or - + + Integer hours = parseInt(str, pos, pos += 2); + if (hours == null) return null; + if (len == pos) return ofHoursMinutesSeconds(hours, 0, 0, positive); + + boolean hasColon = false; + if (str.charAt(pos) == ':') { + pos++; + hasColon = true; + } + + Integer minutes = parseInt(str, pos, pos += 2); + if (minutes == null) return null; + if (len == pos) return ofHoursMinutesSeconds(hours, minutes, 0, positive); + + // either both dividers have a colon, or neither do + if ((str.charAt(pos) == ':') != hasColon) return null; + if (hasColon) { + pos++; + } + + Integer seconds = parseInt(str, pos, pos += 2); + if (seconds == null) return null; + if (len == pos) return ofHoursMinutesSeconds(hours, minutes, seconds, positive); + + // there's some text left over... + return null; + } + + /* + * ZoneOffset.ofTotalSeconds has a ConcurrentHashMap cache of offsets. This is fine, + * but it does mean there's an expensive map lookup every time we call ofTotalSeconds. + * There's no way to get round that, but we can at least have a very quick last-value cache here + * to avoid doing a full map lookup when there's lots of timestamps with the same offset being parsed + */ + private final ThreadLocal lastOffset = ThreadLocal.withInitial(() -> ZoneOffset.UTC); + + private ZoneOffset ofHoursMinutesSeconds(int hours, int minutes, int seconds, boolean positive) { + int totalSeconds = hours * 3600 + minutes * 60 + seconds; + if (positive == false) { + totalSeconds = -totalSeconds; + } + + // check the lastOffset value + ZoneOffset lastOffset = this.lastOffset.get(); + if (totalSeconds == lastOffset.getTotalSeconds()) { + return lastOffset; + } + + try { + ZoneOffset offset = ZoneOffset.ofTotalSeconds(totalSeconds); + this.lastOffset.set(lastOffset); + return offset; + } catch (DateTimeException e) { + // zoneoffset is out of range + return null; + } + } + + /** + * Create a {@code DateTime} object, with the ZoneOffset field set when the zone is an offset, not just an id. + */ + private static DateTime withZoneOffset( + int years, + Integer months, + Integer days, + Integer hours, + Integer minutes, + Integer seconds, + Integer nanos, + ZoneId zoneId + ) { + if (zoneId instanceof ZoneOffset zo) { + return new DateTime(years, months, days, hours, minutes, seconds, nanos, zoneId, zo); + } else { + return new DateTime(years, months, days, hours, minutes, seconds, nanos, zoneId, null); + } + } + + private static final char ZERO = '0'; + private static final char NINE = '9'; + + private static Integer parseInt(CharSequence str, int startInclusive, int endExclusive) { + if (str.length() < endExclusive) return null; + + int result = 0; + for (int i = startInclusive; i < endExclusive; i++) { + char c = str.charAt(i); + if (c < ZERO || c > NINE) return null; + result = result * 10 + (c - ZERO); + } + return result; + } +} diff --git a/server/src/main/java/org/elasticsearch/common/time/JavaDateFormatter.java b/server/src/main/java/org/elasticsearch/common/time/JavaDateFormatter.java index 9c39ee51276d7..707b07c1d68d9 100644 --- a/server/src/main/java/org/elasticsearch/common/time/JavaDateFormatter.java +++ b/server/src/main/java/org/elasticsearch/common/time/JavaDateFormatter.java @@ -21,15 +21,21 @@ import java.util.Collections; import java.util.List; import java.util.Locale; +import java.util.Map; import java.util.Objects; import java.util.function.UnaryOperator; +import static java.util.Map.entry; + class JavaDateFormatter implements DateFormatter { @SuppressWarnings("unchecked") private static T defaultRoundUp(T parser) { if (parser instanceof JavaTimeDateTimeParser jtp) { return (T) defaultRoundUp(jtp); } + if (parser instanceof Iso8601DateTimeParser iso) { + return (T) defaultRoundUp(iso); + } throw new IllegalArgumentException("Unknown parser implementation " + parser.getClass()); } @@ -78,6 +84,19 @@ private static JavaTimeDateTimeParser defaultRoundUp(JavaTimeDateTimeParser pars return new JavaTimeDateTimeParser(builder.toFormatter(parser.getLocale())); } + private static Iso8601DateTimeParser defaultRoundUp(Iso8601DateTimeParser parser) { + return parser.withDefaults( + Map.ofEntries( + entry(ChronoField.MONTH_OF_YEAR, 1), + entry(ChronoField.DAY_OF_MONTH, 1), + entry(ChronoField.HOUR_OF_DAY, 23), + entry(ChronoField.MINUTE_OF_HOUR, 59), + entry(ChronoField.SECOND_OF_MINUTE, 59), + entry(ChronoField.NANO_OF_SECOND, 999_999_999) + ) + ); + } + private final String format; private final DateTimePrinter printer; private final DateTimeParser[] parsers; diff --git a/server/src/test/java/org/elasticsearch/common/time/Iso8601ParserTests.java b/server/src/test/java/org/elasticsearch/common/time/Iso8601ParserTests.java new file mode 100644 index 0000000000000..bfb03ea9496e5 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/common/time/Iso8601ParserTests.java @@ -0,0 +1,427 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.common.time; + +import org.elasticsearch.test.ESTestCase; +import org.hamcrest.Matcher; + +import java.time.ZoneId; +import java.time.ZoneOffset; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeFormatterBuilder; +import java.time.format.DateTimeParseException; +import java.time.format.ResolverStyle; +import java.time.format.SignStyle; +import java.time.temporal.ChronoField; +import java.time.temporal.TemporalAccessor; +import java.time.temporal.TemporalQueries; +import java.time.temporal.ValueRange; +import java.util.Locale; +import java.util.Map; +import java.util.Set; + +import static java.time.temporal.ChronoField.DAY_OF_MONTH; +import static java.time.temporal.ChronoField.HOUR_OF_DAY; +import static java.time.temporal.ChronoField.MINUTE_OF_HOUR; +import static java.time.temporal.ChronoField.MONTH_OF_YEAR; +import static java.time.temporal.ChronoField.NANO_OF_SECOND; +import static java.time.temporal.ChronoField.SECOND_OF_MINUTE; +import static java.time.temporal.ChronoField.YEAR; +import static org.elasticsearch.test.LambdaMatchers.transformedMatch; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.notNullValue; + +public class Iso8601ParserTests extends ESTestCase { + + private static Iso8601Parser defaultParser() { + return new Iso8601Parser(Set.of(), true, Map.of()); + } + + private static Matcher hasResult(DateTime dateTime) { + return transformedMatch(Iso8601Parser.Result::result, equalTo(dateTime)); + } + + private static Matcher hasError(int parseError) { + return transformedMatch(Iso8601Parser.Result::errorIndex, equalTo(parseError)); + } + + public void testStrangeParses() { + assertThat(defaultParser().tryParse("-9999-01-01", null), hasResult(new DateTime(-9999, 1, 1, null, null, null, null, null, null))); + assertThat(defaultParser().tryParse("1000", null), hasResult(new DateTime(1000, null, null, null, null, null, null, null, null))); + assertThat(defaultParser().tryParse("2023-02-02T", null), hasResult(new DateTime(2023, 2, 2, null, null, null, null, null, null))); + + // these are accepted by the previous formatters, but are not valid ISO8601 + assertThat(defaultParser().tryParse("2023-01-01T12:00:00.01,02", null), hasError(22)); + assertThat(defaultParser().tryParse("2023-01-01T12:00:00Europe/Paris+0400", null), hasError(19)); + } + + public void testOutOfRange() { + assertThat(defaultParser().tryParse("2023-13-12", null), hasError(5)); + assertThat(defaultParser().tryParse("2023-12-32", null), hasError(8)); + assertThat(defaultParser().tryParse("2023-12-31T24", null), hasError(11)); + assertThat(defaultParser().tryParse("2023-12-31T23:60", null), hasError(14)); + assertThat(defaultParser().tryParse("2023-12-31T23:59:60", null), hasError(17)); + assertThat(defaultParser().tryParse("2023-12-31T23:59:59+18:30", null), hasError(19)); + } + + public void testMandatoryFields() { + assertThat( + new Iso8601Parser(Set.of(YEAR), true, Map.of()).tryParse("2023", null), + hasResult(new DateTime(2023, null, null, null, null, null, null, null, null)) + ); + assertThat(new Iso8601Parser(Set.of(YEAR, MONTH_OF_YEAR), true, Map.of()).tryParse("2023", null), hasError(4)); + + assertThat( + new Iso8601Parser(Set.of(YEAR, MONTH_OF_YEAR), true, Map.of()).tryParse("2023-06", null), + hasResult(new DateTime(2023, 6, null, null, null, null, null, null, null)) + ); + assertThat(new Iso8601Parser(Set.of(YEAR, MONTH_OF_YEAR, DAY_OF_MONTH), true, Map.of()).tryParse("2023-06", null), hasError(7)); + + assertThat( + new Iso8601Parser(Set.of(YEAR, MONTH_OF_YEAR, DAY_OF_MONTH), true, Map.of()).tryParse("2023-06-20", null), + hasResult(new DateTime(2023, 6, 20, null, null, null, null, null, null)) + ); + assertThat( + new Iso8601Parser(Set.of(YEAR, MONTH_OF_YEAR, DAY_OF_MONTH, HOUR_OF_DAY), false, Map.of()).tryParse("2023-06-20", null), + hasError(10) + ); + + assertThat( + new Iso8601Parser(Set.of(YEAR, MONTH_OF_YEAR, DAY_OF_MONTH, HOUR_OF_DAY), false, Map.of()).tryParse("2023-06-20T15", null), + hasResult(new DateTime(2023, 6, 20, 15, 0, 0, 0, null, null)) + ); + assertThat( + new Iso8601Parser(Set.of(YEAR, MONTH_OF_YEAR, DAY_OF_MONTH, HOUR_OF_DAY, MINUTE_OF_HOUR), false, Map.of()).tryParse( + "2023-06-20T15", + null + ), + hasError(13) + ); + assertThat( + new Iso8601Parser(Set.of(YEAR, MONTH_OF_YEAR, DAY_OF_MONTH, HOUR_OF_DAY, MINUTE_OF_HOUR), false, Map.of()).tryParse( + "2023-06-20T15Z", + null + ), + hasError(13) + ); + + assertThat( + new Iso8601Parser(Set.of(YEAR, MONTH_OF_YEAR, DAY_OF_MONTH, HOUR_OF_DAY, MINUTE_OF_HOUR), false, Map.of()).tryParse( + "2023-06-20T15:48", + null + ), + hasResult(new DateTime(2023, 6, 20, 15, 48, 0, 0, null, null)) + ); + assertThat( + new Iso8601Parser(Set.of(YEAR, MONTH_OF_YEAR, DAY_OF_MONTH, HOUR_OF_DAY, MINUTE_OF_HOUR, SECOND_OF_MINUTE), false, Map.of()) + .tryParse("2023-06-20T15:48", null), + hasError(16) + ); + assertThat( + new Iso8601Parser(Set.of(YEAR, MONTH_OF_YEAR, DAY_OF_MONTH, HOUR_OF_DAY, MINUTE_OF_HOUR, SECOND_OF_MINUTE), false, Map.of()) + .tryParse("2023-06-20T15:48Z", null), + hasError(16) + ); + + assertThat( + new Iso8601Parser(Set.of(YEAR, MONTH_OF_YEAR, DAY_OF_MONTH, HOUR_OF_DAY, MINUTE_OF_HOUR, SECOND_OF_MINUTE), false, Map.of()) + .tryParse("2023-06-20T15:48:09", null), + hasResult(new DateTime(2023, 6, 20, 15, 48, 9, 0, null, null)) + ); + } + + public void testParseNanos() { + assertThat( + defaultParser().tryParse("2023-01-01T12:00:00.5", null), + hasResult(new DateTime(2023, 1, 1, 12, 0, 0, 500_000_000, null, null)) + ); + assertThat( + defaultParser().tryParse("2023-01-01T12:00:00,5", null), + hasResult(new DateTime(2023, 1, 1, 12, 0, 0, 500_000_000, null, null)) + ); + + assertThat( + defaultParser().tryParse("2023-01-01T12:00:00.05", null), + hasResult(new DateTime(2023, 1, 1, 12, 0, 0, 50_000_000, null, null)) + ); + assertThat( + defaultParser().tryParse("2023-01-01T12:00:00,005", null), + hasResult(new DateTime(2023, 1, 1, 12, 0, 0, 5_000_000, null, null)) + ); + assertThat( + defaultParser().tryParse("2023-01-01T12:00:00.0005", null), + hasResult(new DateTime(2023, 1, 1, 12, 0, 0, 500_000, null, null)) + ); + assertThat( + defaultParser().tryParse("2023-01-01T12:00:00,00005", null), + hasResult(new DateTime(2023, 1, 1, 12, 0, 0, 50_000, null, null)) + ); + assertThat( + defaultParser().tryParse("2023-01-01T12:00:00.000005", null), + hasResult(new DateTime(2023, 1, 1, 12, 0, 0, 5_000, null, null)) + ); + assertThat( + defaultParser().tryParse("2023-01-01T12:00:00,0000005", null), + hasResult(new DateTime(2023, 1, 1, 12, 0, 0, 500, null, null)) + ); + assertThat( + defaultParser().tryParse("2023-01-01T12:00:00.00000005", null), + hasResult(new DateTime(2023, 1, 1, 12, 0, 0, 50, null, null)) + ); + assertThat( + defaultParser().tryParse("2023-01-01T12:00:00,000000005", null), + hasResult(new DateTime(2023, 1, 1, 12, 0, 0, 5, null, null)) + ); + + // too many nanos + assertThat(defaultParser().tryParse("2023-01-01T12:00:00.0000000005", null), hasError(29)); + } + + private static Matcher hasTimezone(ZoneId offset) { + return transformedMatch(r -> r.result().query(TemporalQueries.zone()), equalTo(offset)); + } + + public void testParseTimezones() { + // using defaults + assertThat(defaultParser().tryParse("2023-01-01T12:00:00", null), hasTimezone(null)); + assertThat(defaultParser().tryParse("2023-01-01T12:00:00", ZoneOffset.UTC), hasTimezone(ZoneOffset.UTC)); + assertThat(defaultParser().tryParse("2023-01-01T12:00:00", ZoneOffset.ofHours(-3)), hasTimezone(ZoneOffset.ofHours(-3))); + + // timezone specified + assertThat(defaultParser().tryParse("2023-01-01T12:00:00Z", null), hasTimezone(ZoneOffset.UTC)); + + assertThat(defaultParser().tryParse("2023-01-01T12:00:00-05", null), hasTimezone(ZoneOffset.ofHours(-5))); + assertThat(defaultParser().tryParse("2023-01-01T12:00:00+11", null), hasTimezone(ZoneOffset.ofHours(11))); + assertThat(defaultParser().tryParse("2023-01-01T12:00:00+0830", null), hasTimezone(ZoneOffset.ofHoursMinutes(8, 30))); + assertThat(defaultParser().tryParse("2023-01-01T12:00:00-0415", null), hasTimezone(ZoneOffset.ofHoursMinutes(-4, -15))); + assertThat(defaultParser().tryParse("2023-01-01T12:00:00+08:30", null), hasTimezone(ZoneOffset.ofHoursMinutes(8, 30))); + assertThat(defaultParser().tryParse("2023-01-01T12:00:00-04:15", null), hasTimezone(ZoneOffset.ofHoursMinutes(-4, -15))); + assertThat(defaultParser().tryParse("2023-01-01T12:00:00+011030", null), hasTimezone(ZoneOffset.ofHoursMinutesSeconds(1, 10, 30))); + assertThat( + defaultParser().tryParse("2023-01-01T12:00:00-074520", null), + hasTimezone(ZoneOffset.ofHoursMinutesSeconds(-7, -45, -20)) + ); + assertThat( + defaultParser().tryParse("2023-01-01T12:00:00+01:10:30", null), + hasTimezone(ZoneOffset.ofHoursMinutesSeconds(1, 10, 30)) + ); + assertThat( + defaultParser().tryParse("2023-01-01T12:00:00-07:45:20", null), + hasTimezone(ZoneOffset.ofHoursMinutesSeconds(-7, -45, -20)) + ); + + assertThat(defaultParser().tryParse("2023-01-01T12:00:00GMT", null), hasTimezone(ZoneId.of("GMT"))); + assertThat(defaultParser().tryParse("2023-01-01T12:00:00UTC", null), hasTimezone(ZoneId.of("UTC"))); + assertThat(defaultParser().tryParse("2023-01-01T12:00:00UT", null), hasTimezone(ZoneId.of("UT"))); + assertThat(defaultParser().tryParse("2023-01-01T12:00:00GMT+3", null), hasTimezone(ZoneId.of("GMT+3"))); + assertThat(defaultParser().tryParse("2023-01-01T12:00:00UTC-4", null), hasTimezone(ZoneId.of("UTC-4"))); + assertThat(defaultParser().tryParse("2023-01-01T12:00:00UT+6", null), hasTimezone(ZoneId.of("UT+6"))); + assertThat(defaultParser().tryParse("2023-01-01T12:00:00Europe/Paris", null), hasTimezone(ZoneId.of("Europe/Paris"))); + + // we could be more specific in the error index for invalid timezones, + // but that would require keeping track & propagating Result objects within date-time parsing just for the ZoneId + assertThat(defaultParser().tryParse("2023-01-01T12:00:00+04:0030", null), hasError(19)); + assertThat(defaultParser().tryParse("2023-01-01T12:00:00+0400:30", null), hasError(19)); + assertThat(defaultParser().tryParse("2023-01-01T12:00:00Invalid", null), hasError(19)); + } + + private static void assertEquivalent(String text, DateTimeFormatter formatter) { + TemporalAccessor expected = formatter.parse(text); + TemporalAccessor actual = defaultParser().tryParse(text, null).result(); + assertThat(actual, is(notNullValue())); + + assertThat(actual.query(TemporalQueries.localDate()), equalTo(expected.query(TemporalQueries.localDate()))); + assertThat(actual.query(TemporalQueries.localTime()), equalTo(expected.query(TemporalQueries.localTime()))); + assertThat(actual.query(TemporalQueries.zone()), equalTo(expected.query(TemporalQueries.zone()))); + } + + private static void assertEquivalentFailure(String text, DateTimeFormatter formatter) { + DateTimeParseException expected = expectThrows(DateTimeParseException.class, () -> formatter.parse(text)); + int error = defaultParser().tryParse(text, null).errorIndex(); + assertThat(error, greaterThanOrEqualTo(0)); + + assertThat(error, equalTo(expected.getErrorIndex())); + } + + public void testEquivalence() { + // test that Iso8601Parser produces the same output as DateTimeFormatter + DateTimeFormatter mandatoryFormatter = new DateTimeFormatterBuilder().append(DateTimeFormatter.ISO_LOCAL_DATE_TIME) + .optionalStart() + .appendZoneOrOffsetId() + .optionalEnd() + .optionalStart() + .appendOffset("+HHmm", "Z") + .optionalEnd() + .toFormatter(Locale.ROOT) + .withResolverStyle(ResolverStyle.STRICT); + + // just checking timezones/ids here + assertEquivalent("2023-01-01T12:00:00", mandatoryFormatter); + assertEquivalent("2023-01-01T12:00:00Z", mandatoryFormatter); + assertEquivalent("2023-01-01T12:00:00UT", mandatoryFormatter); + assertEquivalent("2023-01-01T12:00:00UTC", mandatoryFormatter); + assertEquivalent("2023-01-01T12:00:00GMT", mandatoryFormatter); + assertEquivalent("2023-01-01T12:00:00+00", mandatoryFormatter); + assertEquivalent("2023-01-01T12:00:00-00", mandatoryFormatter); + assertEquivalent("2023-01-01T12:00:00+05", mandatoryFormatter); + assertEquivalent("2023-01-01T12:00:00+0500", mandatoryFormatter); + assertEquivalent("2023-01-01T12:00:00+05:00", mandatoryFormatter); + assertEquivalent("2023-01-01T12:00:00+05:00:30", mandatoryFormatter); + assertEquivalent("2023-01-01T12:00:00-07", mandatoryFormatter); + assertEquivalent("2023-01-01T12:00:00-0715", mandatoryFormatter); + assertEquivalent("2023-01-01T12:00:00-07:15", mandatoryFormatter); + assertEquivalent("2023-01-01T12:00:00UTC+05:00", mandatoryFormatter); + assertEquivalent("2023-01-01T12:00:00GMT-09:45:30", mandatoryFormatter); + assertEquivalent("2023-01-01T12:00:00Zulu", mandatoryFormatter); + assertEquivalent("2023-01-01T12:00:00Europe/Paris", mandatoryFormatter); + + assertEquivalentFailure("2023-01-01T12:00:00+5", mandatoryFormatter); + assertEquivalentFailure("2023-01-01T12:00:00-7", mandatoryFormatter); + assertEquivalentFailure("2023-01-01T12:00:00InvalidTimeZone", mandatoryFormatter); + + DateTimeFormatter allFieldsOptional = new DateTimeFormatterBuilder().appendValue(YEAR, 4, 4, SignStyle.EXCEEDS_PAD) + .optionalStart() + .appendLiteral('-') + .appendValue(MONTH_OF_YEAR, 2) + .optionalStart() + .appendLiteral('-') + .appendValue(DAY_OF_MONTH, 2) + .optionalStart() + .appendLiteral('T') + .appendValue(HOUR_OF_DAY, 2) + .optionalStart() + .appendLiteral(':') + .appendValue(MINUTE_OF_HOUR, 2) + .optionalStart() + .appendLiteral(':') + .appendValue(SECOND_OF_MINUTE, 2) + .optionalEnd() + .optionalEnd() + .optionalEnd() + .optionalEnd() + .optionalEnd() + .optionalStart() + .appendZoneOrOffsetId() + .optionalEnd() + .optionalStart() + .appendOffset("+HHmm", "Z") + .optionalEnd() + .toFormatter(Locale.ROOT) + .withResolverStyle(ResolverStyle.STRICT); + + assertEquivalent("2023", allFieldsOptional); + assertEquivalent("2023-04", allFieldsOptional); + assertEquivalent("2023-04-08", allFieldsOptional); + assertEquivalent("2023-04-08T13", allFieldsOptional); + assertEquivalent("2023-04-08T13:45", allFieldsOptional); + assertEquivalent("2023-04-08T13:45:50", allFieldsOptional); + assertEquivalent("-2023-04-08T13:45:50", allFieldsOptional); + } + + private static int randomValue(ValueRange range) { + assert range.isIntValue(); + return randomIntBetween((int) range.getMinimum(), (int) range.getMaximum()); + } + + public void testDefaults() { + Map defaults = Map.of( + MONTH_OF_YEAR, + randomValue(MONTH_OF_YEAR.range()), + DAY_OF_MONTH, + randomValue(DAY_OF_MONTH.range()), + HOUR_OF_DAY, + randomValue(HOUR_OF_DAY.range()), + MINUTE_OF_HOUR, + randomValue(MINUTE_OF_HOUR.range()), + SECOND_OF_MINUTE, + randomValue(SECOND_OF_MINUTE.range()), + NANO_OF_SECOND, + randomValue(NANO_OF_SECOND.range()) + ); + + assertThat( + new Iso8601Parser(Set.of(), true, defaults).tryParse("2023", null), + hasResult( + new DateTime( + 2023, + defaults.get(MONTH_OF_YEAR), + defaults.get(DAY_OF_MONTH), + defaults.get(HOUR_OF_DAY), + defaults.get(MINUTE_OF_HOUR), + defaults.get(SECOND_OF_MINUTE), + defaults.get(NANO_OF_SECOND), + null, + null + ) + ) + ); + assertThat( + new Iso8601Parser(Set.of(), true, defaults).tryParse("2023-01", null), + hasResult( + new DateTime( + 2023, + 1, + defaults.get(DAY_OF_MONTH), + defaults.get(HOUR_OF_DAY), + defaults.get(MINUTE_OF_HOUR), + defaults.get(SECOND_OF_MINUTE), + defaults.get(NANO_OF_SECOND), + null, + null + ) + ) + ); + assertThat( + new Iso8601Parser(Set.of(), true, defaults).tryParse("2023-01-01", null), + hasResult( + new DateTime( + 2023, + 1, + 1, + defaults.get(HOUR_OF_DAY), + defaults.get(MINUTE_OF_HOUR), + defaults.get(SECOND_OF_MINUTE), + defaults.get(NANO_OF_SECOND), + null, + null + ) + ) + ); + assertThat( + new Iso8601Parser(Set.of(), true, defaults).tryParse("2023-01-01T00", null), + hasResult( + new DateTime( + 2023, + 1, + 1, + 0, + defaults.get(MINUTE_OF_HOUR), + defaults.get(SECOND_OF_MINUTE), + defaults.get(NANO_OF_SECOND), + null, + null + ) + ) + ); + assertThat( + new Iso8601Parser(Set.of(), true, defaults).tryParse("2023-01-01T00:00", null), + hasResult(new DateTime(2023, 1, 1, 0, 0, defaults.get(SECOND_OF_MINUTE), defaults.get(NANO_OF_SECOND), null, null)) + ); + assertThat( + new Iso8601Parser(Set.of(), true, defaults).tryParse("2023-01-01T00:00:00", null), + hasResult(new DateTime(2023, 1, 1, 0, 0, 0, defaults.get(NANO_OF_SECOND), null, null)) + ); + assertThat( + new Iso8601Parser(Set.of(), true, defaults).tryParse("2023-01-01T00:00:00.0", null), + hasResult(new DateTime(2023, 1, 1, 0, 0, 0, 0, null, null)) + ); + } +}