Create a custom parser for parsing ISO8601 datetime variants (elastic…

…#106486) This adds a hand-written parser for parsing fixed ISO8601 datetime strings, for the `iso8601`, `strict_date_optional_time`, and `strict_date_optional_time_nanos` date formats. If the new parser fails to parse a string, the existing parsers are then tried, so existing behaviour is maintained. There is a new JVM option added that can force use of the existing parsers, if that is needed for any reason.
danielmitterdorfer · May 14, 2024 · e01600d · e01600d
1 parent 3fe4785
commit e01600d
Show file tree

Hide file tree

Showing 8 changed files with 1,371 additions and 44 deletions.
diff --git a/docs/changelog/106486.yaml b/docs/changelog/106486.yaml
@@ -0,0 +1,17 @@
+pr: 106486
+summary: Create custom parser for ISO-8601 datetimes
+area: Infra/Core
+type: enhancement
+issues:
+  - 102063
+highlight:
+  title: New custom parser for ISO-8601 datetimes
+  body: |-
+    This introduces a new custom parser for ISO-8601 datetimes, for the `iso8601`, `strict_date_optional_time`, and
+    `strict_date_optional_time_nanos` built-in date formats. This provides a performance improvement over the
+    default Java date-time parsing. Whilst it maintains much of the same behaviour,
+    the new parser does not accept nonsensical date-time strings that have multiple fractional seconds fields
+    or multiple timezone specifiers. If the new parser fails to parse a string, it will then use the previous parser
+    to parse it. If a large proportion of the input data consists of these invalid strings, this may cause
+    a small performance degradation. If you wish to force the use of the old parsers regardless,
+    set the JVM property `es.datetime.java_time_parsers=true` on all ES nodes.
diff --git a/server/src/main/java/org/elasticsearch/common/time/CharSubSequence.java b/server/src/main/java/org/elasticsearch/common/time/CharSubSequence.java
@@ -0,0 +1,68 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.common.time;
+
+import java.util.stream.IntStream;
+
+/**
+ * A CharSequence that provides a subsequence of another CharSequence without allocating a new backing array (as String does)
+ */
+class CharSubSequence implements CharSequence {
+    private final CharSequence wrapped;
+    private final int startOffset;    // inclusive
+    private final int endOffset;      // exclusive
+
+    CharSubSequence(CharSequence wrapped, int startOffset, int endOffset) {
+        if (startOffset < 0) throw new IllegalArgumentException();
+        if (endOffset > wrapped.length()) throw new IllegalArgumentException();
+        if (endOffset < startOffset) throw new IllegalArgumentException();
+
+        this.wrapped = wrapped;
+        this.startOffset = startOffset;
+        this.endOffset = endOffset;
+    }
+
+    @Override
+    public int length() {
+        return endOffset - startOffset;
+    }
+
+    @Override
+    public char charAt(int index) {
+        int adjustedIndex = index + startOffset;
+        if (adjustedIndex < startOffset || adjustedIndex >= endOffset) throw new IndexOutOfBoundsException(index);
+        return wrapped.charAt(adjustedIndex);
+    }
+
+    @Override
+    public boolean isEmpty() {
+        return startOffset == endOffset;
+    }
+
+    @Override
+    public CharSequence subSequence(int start, int end) {
+        int adjustedStart = start + startOffset;
+        int adjustedEnd = end + startOffset;
+        if (adjustedStart < startOffset) throw new IndexOutOfBoundsException(start);
+        if (adjustedEnd > endOffset) throw new IndexOutOfBoundsException(end);
+        if (adjustedStart > adjustedEnd) throw new IndexOutOfBoundsException();
+
+        return wrapped.subSequence(adjustedStart, adjustedEnd);
+    }
+
+    @Override
+    public IntStream chars() {
+        return wrapped.chars().skip(startOffset).limit(endOffset - startOffset);
+    }
+
+    @Override
+    public String toString() {
+        return wrapped.subSequence(startOffset, endOffset).toString();
+    }
+}
diff --git a/server/src/main/java/org/elasticsearch/common/time/DateFormatters.java b/server/src/main/java/org/elasticsearch/common/time/DateFormatters.java
@@ -9,7 +9,10 @@
 package org.elasticsearch.common.time;
 
 import org.elasticsearch.common.Strings;
+import org.elasticsearch.core.Booleans;
 import org.elasticsearch.core.SuppressForbidden;
+import org.elasticsearch.core.UpdateForV9;
+import org.elasticsearch.logging.internal.spi.LoggerFactory;
 
 import java.time.Instant;
 import java.time.LocalDate;
@@ -30,6 +33,7 @@
 import java.time.temporal.TemporalQuery;
 import java.time.temporal.WeekFields;
 import java.util.Locale;
+import java.util.Set;
 import java.util.stream.Stream;
 
 import static java.time.temporal.ChronoField.DAY_OF_MONTH;
@@ -43,6 +47,24 @@
 
 public class DateFormatters {
 
+    /**
+     * The ISO8601 parser is as close as possible to the java.time based parsers, but there are some strings
+     * that are no longer accepted (multiple fractional seconds, or multiple timezones) by the ISO parser.
+     * If a string cannot be parsed by the ISO parser, it then tries the java.time one.
+     * If there's lots of these strings, trying the ISO parser, then the java.time parser, might cause a performance drop.
+     * So provide a JVM option so that users can just use the java.time parsers, if they really need to.
+     */
+    @UpdateForV9    // evaluate if we need to deprecate/remove this
+    private static final boolean JAVA_TIME_PARSERS_ONLY = Booleans.parseBoolean(System.getProperty("es.datetime.java_time_parsers"), false);
+
+    static {
+        // when this is used directly in tests ES logging may not have been initialized yet
+        LoggerFactory logger;
+        if (JAVA_TIME_PARSERS_ONLY && (logger = LoggerFactory.provider()) != null) {
+            logger.getLogger(DateFormatters.class).info("Using java.time datetime parsers only");
+        }
+    }
+
     private static DateFormatter newDateFormatter(String format, DateTimeFormatter formatter) {
         return new JavaDateFormatter(format, new JavaTimeDateTimePrinter(formatter), new JavaTimeDateTimeParser(formatter));
     }
@@ -168,11 +190,18 @@ private static DateFormatter newDateFormatter(String format, DateTimeFormatter p
     /**
      * Returns a generic ISO datetime parser where the date is mandatory and the time is optional.
      */
-    private static final DateFormatter STRICT_DATE_OPTIONAL_TIME = newDateFormatter(
-        "strict_date_optional_time",
-        STRICT_DATE_OPTIONAL_TIME_PRINTER,
-        STRICT_DATE_OPTIONAL_TIME_FORMATTER
-    );
+    private static final DateFormatter STRICT_DATE_OPTIONAL_TIME;
+    static {
+        DateTimeParser javaTimeParser = new JavaTimeDateTimeParser(STRICT_DATE_OPTIONAL_TIME_FORMATTER);
+
+        STRICT_DATE_OPTIONAL_TIME = new JavaDateFormatter(
+            "strict_date_optional_time",
+            new JavaTimeDateTimePrinter(STRICT_DATE_OPTIONAL_TIME_PRINTER),
+            JAVA_TIME_PARSERS_ONLY
+                ? new DateTimeParser[] { javaTimeParser }
+                : new DateTimeParser[] { new Iso8601DateTimeParser(Set.of(), false).withLocale(Locale.ROOT), javaTimeParser }
+        );
+    }
 
     private static final DateTimeFormatter STRICT_DATE_OPTIONAL_TIME_FORMATTER_WITH_NANOS = new DateTimeFormatterBuilder().append(
         STRICT_YEAR_MONTH_DAY_FORMATTER
@@ -224,51 +253,69 @@ private static DateFormatter newDateFormatter(String format, DateTimeFormatter p
     /**
      * Returns a generic ISO datetime parser where the date is mandatory and the time is optional with nanosecond resolution.
      */
-    private static final DateFormatter STRICT_DATE_OPTIONAL_TIME_NANOS = newDateFormatter(
-        "strict_date_optional_time_nanos",
-        STRICT_DATE_OPTIONAL_TIME_PRINTER_NANOS,
-        STRICT_DATE_OPTIONAL_TIME_FORMATTER_WITH_NANOS
-    );
+    private static final DateFormatter STRICT_DATE_OPTIONAL_TIME_NANOS;
+    static {
+        DateTimeParser javaTimeParser = new JavaTimeDateTimeParser(STRICT_DATE_OPTIONAL_TIME_FORMATTER_WITH_NANOS);
+
+        STRICT_DATE_OPTIONAL_TIME_NANOS = new JavaDateFormatter(
+            "strict_date_optional_time_nanos",
+            new JavaTimeDateTimePrinter(STRICT_DATE_OPTIONAL_TIME_PRINTER_NANOS),
+            JAVA_TIME_PARSERS_ONLY
+                ? new DateTimeParser[] { javaTimeParser }
+                : new DateTimeParser[] {
+                    new Iso8601DateTimeParser(Set.of(HOUR_OF_DAY, MINUTE_OF_HOUR, SECOND_OF_MINUTE), true).withLocale(Locale.ROOT),
+                    javaTimeParser }
+        );
+    }
 
     /**
      * Returns a ISO 8601 compatible date time formatter and parser.
      * This is not fully compatible to the existing spec, which would require far more edge cases, but merely compatible with the
      * existing legacy joda time ISO date formatter
      */
-    private static final DateFormatter ISO_8601 = newDateFormatter(
-        "iso8601",
-        STRICT_DATE_OPTIONAL_TIME_PRINTER,
-        new DateTimeFormatterBuilder().append(STRICT_YEAR_MONTH_DAY_FORMATTER)
-            .optionalStart()
-            .appendLiteral('T')
-            .optionalStart()
-            .appendValue(HOUR_OF_DAY, 2, 2, SignStyle.NOT_NEGATIVE)
-            .optionalStart()
-            .appendLiteral(':')
-            .appendValue(MINUTE_OF_HOUR, 2, 2, SignStyle.NOT_NEGATIVE)
-            .optionalStart()
-            .appendLiteral(':')
-            .appendValue(SECOND_OF_MINUTE, 2, 2, SignStyle.NOT_NEGATIVE)
-            .optionalStart()
-            .appendFraction(NANO_OF_SECOND, 1, 9, true)
-            .optionalEnd()
-            .optionalStart()
-            .appendLiteral(",")
-            .appendFraction(NANO_OF_SECOND, 1, 9, false)
-            .optionalEnd()
-            .optionalEnd()
-            .optionalEnd()
-            .optionalEnd()
-            .optionalStart()
-            .appendZoneOrOffsetId()
-            .optionalEnd()
-            .optionalStart()
-            .append(TIME_ZONE_FORMATTER_NO_COLON)
-            .optionalEnd()
-            .optionalEnd()
-            .toFormatter(Locale.ROOT)
-            .withResolverStyle(ResolverStyle.STRICT)
-    );
+    private static final DateFormatter ISO_8601;
+    static {
+        DateTimeParser javaTimeParser = new JavaTimeDateTimeParser(
+            new DateTimeFormatterBuilder().append(STRICT_YEAR_MONTH_DAY_FORMATTER)
+                .optionalStart()
+                .appendLiteral('T')
+                .optionalStart()
+                .appendValue(HOUR_OF_DAY, 2, 2, SignStyle.NOT_NEGATIVE)
+                .optionalStart()
+                .appendLiteral(':')
+                .appendValue(MINUTE_OF_HOUR, 2, 2, SignStyle.NOT_NEGATIVE)
+                .optionalStart()
+                .appendLiteral(':')
+                .appendValue(SECOND_OF_MINUTE, 2, 2, SignStyle.NOT_NEGATIVE)
+                .optionalStart()
+                .appendFraction(NANO_OF_SECOND, 1, 9, true)
+                .optionalEnd()
+                .optionalStart()
+                .appendLiteral(",")
+                .appendFraction(NANO_OF_SECOND, 1, 9, false)
+                .optionalEnd()
+                .optionalEnd()
+                .optionalEnd()
+                .optionalEnd()
+                .optionalStart()
+                .appendZoneOrOffsetId()
+                .optionalEnd()
+                .optionalStart()
+                .append(TIME_ZONE_FORMATTER_NO_COLON)
+                .optionalEnd()
+                .optionalEnd()
+                .toFormatter(Locale.ROOT)
+                .withResolverStyle(ResolverStyle.STRICT)
+        );
+
+        ISO_8601 = new JavaDateFormatter(
+            "iso8601",
+            new JavaTimeDateTimePrinter(STRICT_DATE_OPTIONAL_TIME_PRINTER),
+            JAVA_TIME_PARSERS_ONLY
+                ? new DateTimeParser[] { javaTimeParser }
+                : new DateTimeParser[] { new Iso8601DateTimeParser(Set.of(), false).withLocale(Locale.ROOT), javaTimeParser }
+        );
+    }
 
     /////////////////////////////////////////
     //