Skip to content

Commit

Permalink
Parse composite patterns using ClassicFormat.parseObject backport(#40100
Browse files Browse the repository at this point in the history
) (#40503)

Java-time fails parsing composite patterns when first pattern matches only the prefix of the input. It expects pattern in longest to shortest order. Because of this constructing just one DateTimeFormatter with appendOptional is not sufficient. Parsers have to be iterated and if the parsing fails, the next one in order should be used. In order to not degrade performance parsing should not be throw exceptions on failure. Format.parseObject was used as it only returns null when parsing failed and allows to check if full input was read.

closes #39916
backport #40100
pgomulka authored Apr 4, 2019

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
1 parent 7c66769 commit a154751
Showing 8 changed files with 126 additions and 68 deletions.
Original file line number Diff line number Diff line change
@@ -45,6 +45,13 @@ public JodaDateFormatter(String pattern, DateTimeFormatter parser, DateTimeForma
this.parser = parser;
}

/**
* Try to parse input to a java time TemporalAccessor using joda-time library.
* @see DateFormatter#parse(String)
* @throws IllegalArgumentException if the text to parse is invalid
* @throws java.time.DateTimeException if the parsing result exceeds the supported range of <code>ZoneDateTime</code>
* or if the parsed instant exceeds the maximum or minimum instant
*/
@Override
public TemporalAccessor parse(String input) {
final DateTime dt = parser.parseDateTime(input);
Original file line number Diff line number Diff line change
@@ -23,11 +23,11 @@
import org.elasticsearch.common.joda.Joda;
import org.joda.time.DateTime;

import java.time.DateTimeException;
import java.time.Instant;
import java.time.ZoneId;
import java.time.ZoneOffset;
import java.time.ZonedDateTime;
import java.time.format.DateTimeParseException;
import java.time.temporal.TemporalAccessor;
import java.util.ArrayList;
import java.util.List;
@@ -38,8 +38,10 @@ public interface DateFormatter {
/**
* Try to parse input to a java time TemporalAccessor
* @param input An arbitrary string resembling the string representation of a date or time
* @throws DateTimeParseException If parsing fails, this exception will be thrown.
* @throws IllegalArgumentException If parsing fails, this exception will be thrown.
* Note that it can contained suppressed exceptions when several formatters failed parse this value
* @throws DateTimeException if the parsing result exceeds the supported range of <code>ZoneDateTime</code>
* or if the parsed instant exceeds the maximum or minimum instant
* @return The java time object containing the parsed input
*/
TemporalAccessor parse(String input);
Original file line number Diff line number Diff line change
@@ -1585,7 +1585,7 @@ static JavaDateFormatter merge(String pattern, List<DateFormatter> formatters) {
if (printer == null) {
printer = javaDateFormatter.getPrinter();
}
dateTimeFormatters.add(javaDateFormatter.getParser());
dateTimeFormatters.addAll(javaDateFormatter.getParsers());
roundupBuilder.appendOptional(javaDateFormatter.getRoundupParser());
}
DateTimeFormatter roundUpParser = roundupBuilder.toFormatter(Locale.ROOT);
@@ -1632,7 +1632,7 @@ public static ZonedDateTime from(TemporalAccessor accessor) {
if (zoneId == null) {
zoneId = ZoneOffset.UTC;
}

LocalDate localDate = accessor.query(TemporalQueries.localDate());
LocalTime localTime = accessor.query(TemporalQueries.localTime());
boolean isLocalDateSet = localDate != null;
Original file line number Diff line number Diff line change
@@ -21,14 +21,19 @@

import org.elasticsearch.common.Strings;

import java.text.ParsePosition;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeFormatterBuilder;
import java.time.format.DateTimeParseException;
import java.time.temporal.ChronoField;
import java.time.temporal.TemporalAccessor;
import java.time.temporal.TemporalField;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
@@ -38,6 +43,7 @@ class JavaDateFormatter implements DateFormatter {

// base fields which should be used for default parsing, when we round up for date math
private static final Map<TemporalField, Long> ROUND_UP_BASE_FIELDS = new HashMap<>(6);

{
ROUND_UP_BASE_FIELDS.put(ChronoField.MONTH_OF_YEAR, 1L);
ROUND_UP_BASE_FIELDS.put(ChronoField.DAY_OF_MONTH, 1L);
@@ -49,22 +55,15 @@ class JavaDateFormatter implements DateFormatter {

private final String format;
private final DateTimeFormatter printer;
private final DateTimeFormatter parser;
private final List<DateTimeFormatter> parsers;
private final DateTimeFormatter roundupParser;

private JavaDateFormatter(String format, DateTimeFormatter printer, DateTimeFormatter roundupParser, DateTimeFormatter parser) {
this.format = "8" + format;
this.printer = printer;
this.roundupParser = roundupParser;
this.parser = parser;
}

JavaDateFormatter(String format, DateTimeFormatter printer, DateTimeFormatter... parsers) {
this(format, printer, builder -> ROUND_UP_BASE_FIELDS.forEach(builder::parseDefaulting), parsers);
}

JavaDateFormatter(String format, DateTimeFormatter printer, Consumer<DateTimeFormatterBuilder> roundupParserConsumer,
DateTimeFormatter... parsers) {
DateTimeFormatter... parsers) {
if (printer == null) {
throw new IllegalArgumentException("printer may not be null");
}
@@ -76,28 +75,23 @@ private JavaDateFormatter(String format, DateTimeFormatter printer, DateTimeForm
if (distinctLocales > 1) {
throw new IllegalArgumentException("formatters must have the same locale");
}
this.printer = printer;
this.format = "8" + format;

if (parsers.length == 0) {
this.parser = printer;
} else if (parsers.length == 1) {
this.parser = parsers[0];
this.parsers = Collections.singletonList(printer);
} else {
DateTimeFormatterBuilder builder = new DateTimeFormatterBuilder();
for (DateTimeFormatter parser : parsers) {
builder.appendOptional(parser);
}
this.parser = builder.toFormatter(Locale.ROOT);
this.parsers = Arrays.asList(parsers);
}
this.format = "8" + format;
this.printer = printer;

DateTimeFormatterBuilder builder = new DateTimeFormatterBuilder();
if (format.contains("||") == false) {
builder.append(this.parser);
builder.append(this.parsers.get(0));
}
roundupParserConsumer.accept(builder);
DateTimeFormatter roundupFormatter = builder.toFormatter(parser.getLocale());
DateTimeFormatter roundupFormatter = builder.toFormatter(locale());
if (printer.getZone() != null) {
roundupFormatter = roundupFormatter.withZone(printer.getZone());
roundupFormatter = roundupFormatter.withZone(zone());
}
this.roundupParser = roundupFormatter;
}
@@ -106,10 +100,6 @@ DateTimeFormatter getRoundupParser() {
return roundupParser;
}

DateTimeFormatter getParser() {
return parser;
}

DateTimeFormatter getPrinter() {
return printer;
}
@@ -119,27 +109,66 @@ public TemporalAccessor parse(String input) {
if (Strings.isNullOrEmpty(input)) {
throw new IllegalArgumentException("cannot parse empty date");
}
return parser.parse(input);

try {
return doParse(input);
} catch (DateTimeParseException e) {
throw new IllegalArgumentException("failed to parse date field [" + input + "] with format [" + format + "]", e);
}
}

/**
* Attempt parsing the input without throwing exception. If multiple parsers are provided,
* it will continue iterating if the previous parser failed. The pattern must fully match, meaning whole input was used.
* This also means that this method depends on <code>DateTimeFormatter.ClassicFormat.parseObject</code>
* which does not throw exceptions when parsing failed.
*
* The approach with collection of parsers was taken because java-time requires ordering on optional (composite)
* patterns. Joda does not suffer from this.
* https://bugs.openjdk.java.net/browse/JDK-8188771
*
* @param input An arbitrary string resembling the string representation of a date or time
* @return a TemporalAccessor if parsing was successful.
* @throws DateTimeParseException when unable to parse with any parsers
*/
private TemporalAccessor doParse(String input) {
if (parsers.size() > 1) {
for (DateTimeFormatter formatter : parsers) {
ParsePosition pos = new ParsePosition(0);
Object object = formatter.toFormat().parseObject(input, pos);
if (parsingSucceeded(object, input, pos) == true) {
return (TemporalAccessor) object;
}
}
throw new DateTimeParseException("Failed to parse with all enclosed parsers", input, 0);
}
return this.parsers.get(0).parse(input);
}

private boolean parsingSucceeded(Object object, String input, ParsePosition pos) {
return object != null && pos.getIndex() == input.length();
}

@Override
public DateFormatter withZone(ZoneId zoneId) {
// shortcurt to not create new objects unnecessarily
if (zoneId.equals(parser.getZone())) {
if (zoneId.equals(zone())) {
return this;
}

return new JavaDateFormatter(format, printer.withZone(zoneId), roundupParser.withZone(zoneId), parser.withZone(zoneId));
return new JavaDateFormatter(format, printer.withZone(zoneId),
parsers.stream().map(p -> p.withZone(zoneId)).toArray(size -> new DateTimeFormatter[size]));
}

@Override
public DateFormatter withLocale(Locale locale) {
// shortcurt to not create new objects unnecessarily
if (locale.equals(parser.getLocale())) {
if (locale.equals(locale())) {
return this;
}

return new JavaDateFormatter(format, printer.withLocale(locale), roundupParser.withLocale(locale), parser.withLocale(locale));
return new JavaDateFormatter(format, printer.withLocale(locale),
parsers.stream().map(p -> p.withLocale(locale)).toArray(size -> new DateTimeFormatter[size]));
}

@Override
@@ -164,7 +193,7 @@ public ZoneId zone() {

@Override
public DateMathParser toDateMathParser() {
return new JavaDateMathParser(format, parser, roundupParser);
return new JavaDateMathParser(format, this, getRoundupParser());
}

@Override
@@ -180,12 +209,16 @@ public boolean equals(Object obj) {
JavaDateFormatter other = (JavaDateFormatter) obj;

return Objects.equals(format, other.format) &&
Objects.equals(locale(), other.locale()) &&
Objects.equals(this.printer.getZone(), other.printer.getZone());
Objects.equals(locale(), other.locale()) &&
Objects.equals(this.printer.getZone(), other.printer.getZone());
}

@Override
public String toString() {
return String.format(Locale.ROOT, "format[%s] locale[%s]", format, locale());
}

Collection<DateTimeFormatter> getParsers() {
return parsers;
}
}
Original file line number Diff line number Diff line change
@@ -35,6 +35,7 @@
import java.time.temporal.TemporalAdjusters;
import java.time.temporal.TemporalQueries;
import java.util.Objects;
import java.util.function.Function;
import java.util.function.LongSupplier;

/**
@@ -46,11 +47,11 @@
*/
public class JavaDateMathParser implements DateMathParser {

private final DateTimeFormatter formatter;
private final JavaDateFormatter formatter;
private final DateTimeFormatter roundUpFormatter;
private final String format;

JavaDateMathParser(String format, DateTimeFormatter formatter, DateTimeFormatter roundUpFormatter) {
JavaDateMathParser(String format, JavaDateFormatter formatter, DateTimeFormatter roundUpFormatter) {
Objects.requireNonNull(formatter);
this.format = format;
this.formatter = formatter;
@@ -214,12 +215,12 @@ private long parseDateTime(String value, ZoneId timeZone, boolean roundUpIfNoTim
throw new IllegalArgumentException("cannot parse empty date");
}

DateTimeFormatter formatter = roundUpIfNoTime ? this.roundUpFormatter : this.formatter;
Function<String,TemporalAccessor> formatter = roundUpIfNoTime ? this.roundUpFormatter::parse : this.formatter::parse;
try {
if (timeZone == null) {
return DateFormatters.from(formatter.parse(value)).toInstant().toEpochMilli();
return DateFormatters.from(formatter.apply(value)).toInstant().toEpochMilli();
} else {
TemporalAccessor accessor = formatter.parse(value);
TemporalAccessor accessor = formatter.apply(value);
ZoneId zoneId = TemporalQueries.zone().queryFrom(accessor);
if (zoneId != null) {
timeZone = zoneId;
@@ -228,7 +229,8 @@ private long parseDateTime(String value, ZoneId timeZone, boolean roundUpIfNoTim
return DateFormatters.from(accessor).withZoneSameLocal(timeZone).toInstant().toEpochMilli();
}
} catch (IllegalArgumentException | DateTimeException e) {
throw new ElasticsearchParseException("failed to parse date field [{}] in format [{}]: [{}]", e, value, format, e.getMessage());
throw new ElasticsearchParseException("failed to parse date field [{}] with format [{}]: [{}]",
e, value, format, e.getMessage());
}
}
}
Original file line number Diff line number Diff line change
@@ -29,13 +29,11 @@
import java.time.ZoneOffset;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeParseException;
import java.time.temporal.TemporalAccessor;
import java.util.Locale;

import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.startsWith;

public class JavaJodaTimeDuellingTests extends ESTestCase {

@@ -290,7 +288,7 @@ public void testDuellingFormatsValidParsing() {
// joda comes up with a different exception message here, so we have to adapt
assertJodaParseException("2012-W1-8", "week_date",
"Cannot parse \"2012-W1-8\": Value 8 for dayOfWeek must be in the range [1,7]");
assertJavaTimeParseException("2012-W1-8", "week_date", "Text '2012-W1-8' could not be parsed");
assertJavaTimeParseException("2012-W1-8", "week_date");

assertSameDate("2012-W48-6T10:15:30.123Z", "week_date_time");
assertSameDate("2012-W48-6T10:15:30.123456789Z", "week_date_time");
@@ -330,6 +328,17 @@ public void testDuellingFormatsValidParsing() {
assertSameDate("2012-W1-1", "weekyear_week_day");
}

public void testCompositeParsing(){
//in all these examples the second pattern will be used
assertSameDate("2014-06-06T12:01:02.123", "yyyy-MM-dd'T'HH:mm:ss||yyyy-MM-dd'T'HH:mm:ss.SSS");
assertSameDate("2014-06-06T12:01:02.123", "strictDateTimeNoMillis||yyyy-MM-dd'T'HH:mm:ss.SSS");
assertSameDate("2014-06-06T12:01:02.123", "yyyy-MM-dd'T'HH:mm:ss+HH:MM||yyyy-MM-dd'T'HH:mm:ss.SSS");
}

public void testExceptionWhenCompositeParsingFails(){
assertParseException("2014-06-06T12:01:02.123", "yyyy-MM-dd'T'HH:mm:ss||yyyy-MM-dd'T'HH:mm:ss.SS");
}

public void testDuelingStrictParsing() {
assertSameDate("2018W313", "strict_basic_week_date");
assertParseException("18W313", "strict_basic_week_date");
@@ -477,7 +486,7 @@ public void testDuelingStrictParsing() {
// joda comes up with a different exception message here, so we have to adapt
assertJodaParseException("2012-W01-8", "strict_week_date",
"Cannot parse \"2012-W01-8\": Value 8 for dayOfWeek must be in the range [1,7]");
assertJavaTimeParseException("2012-W01-8", "strict_week_date", "Text '2012-W01-8' could not be parsed");
assertJavaTimeParseException("2012-W01-8", "strict_week_date");

assertSameDate("2012-W48-6T10:15:30.123Z", "strict_week_date_time");
assertSameDate("2012-W48-6T10:15:30.123456789Z", "strict_week_date_time");
@@ -624,7 +633,7 @@ public void testParsingMissingTimezone() {

private void assertSamePrinterOutput(String format, ZonedDateTime javaDate, DateTime jodaDate) {
assertThat(jodaDate.getMillis(), is(javaDate.toInstant().toEpochMilli()));
String javaTimeOut = DateFormatters.forPattern(format).format(javaDate);
String javaTimeOut = DateFormatter.forPattern(format).format(javaDate);
String jodaTimeOut = DateFormatter.forPattern(format).formatJoda(jodaDate);
if (JavaVersion.current().getVersion().get(0) == 8 && javaTimeOut.endsWith(".0")
&& (format.equals("epoch_second") || format.equals("epoch_millis"))) {
@@ -639,7 +648,7 @@ private void assertSamePrinterOutput(String format, ZonedDateTime javaDate, Date

private void assertSameDate(String input, String format) {
DateFormatter jodaFormatter = Joda.forPattern(format);
DateFormatter javaFormatter = DateFormatters.forPattern(format);
DateFormatter javaFormatter = DateFormatter.forPattern(format);
assertSameDate(input, format, jodaFormatter, javaFormatter);
}

@@ -657,7 +666,7 @@ private void assertSameDate(String input, String format, DateFormatter jodaForma

private void assertParseException(String input, String format) {
assertJodaParseException(input, format, "Invalid format: \"" + input);
assertJavaTimeParseException(input, format, "Text '" + input + "' could not be parsed");
assertJavaTimeParseException(input, format);
}

private void assertJodaParseException(String input, String format, String expectedMessage) {
@@ -666,9 +675,10 @@ private void assertJodaParseException(String input, String format, String expect
assertThat(e.getMessage(), containsString(expectedMessage));
}

private void assertJavaTimeParseException(String input, String format, String expectedMessage) {
DateFormatter javaTimeFormatter = DateFormatters.forPattern(format);
DateTimeParseException dateTimeParseException = expectThrows(DateTimeParseException.class, () -> javaTimeFormatter.parse(input));
assertThat(dateTimeParseException.getMessage(), startsWith(expectedMessage));
private void assertJavaTimeParseException(String input, String format) {
DateFormatter javaTimeFormatter = DateFormatter.forPattern("8"+format);
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> javaTimeFormatter.parse(input));
assertThat(e.getMessage(), containsString(input));
assertThat(e.getMessage(), containsString(format));
}
}
Original file line number Diff line number Diff line change
@@ -25,7 +25,6 @@
import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeParseException;
import java.time.temporal.ChronoField;
import java.time.temporal.TemporalAccessor;
import java.util.Locale;
@@ -65,11 +64,13 @@ public void testEpochMillisParser() {

public void testEpochMilliParser() {
DateFormatter formatter = DateFormatter.forPattern("8epoch_millis");
DateTimeParseException e = expectThrows(DateTimeParseException.class, () -> formatter.parse("invalid"));
assertThat(e.getMessage(), containsString("could not be parsed"));
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> formatter.parse("invalid"));
assertThat(e.getMessage(), containsString("invalid"));
assertThat(e.getMessage(), containsString("8epoch_millis"));

e = expectThrows(DateTimeParseException.class, () -> formatter.parse("123.1234567"));
assertThat(e.getMessage(), containsString("unparsed text found"));
e = expectThrows(IllegalArgumentException.class, () -> formatter.parse("123.1234567"));
assertThat(e.getMessage(), containsString("123.1234567"));
assertThat(e.getMessage(), containsString("8epoch_millis"));
}

// this is not in the duelling tests, because the epoch second parser in joda time drops the milliseconds after the comma
@@ -88,15 +89,17 @@ public void testEpochSecondParserWithFraction() {
assertThat(instant.getEpochSecond(), is(1234L));
assertThat(instant.getNano(), is(0));

DateTimeParseException e = expectThrows(DateTimeParseException.class, () -> formatter.parse("abc"));
assertThat(e.getMessage(), is("Text 'abc' could not be parsed, unparsed text found at index 0"));
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> formatter.parse("abc"));
assertThat(e.getMessage(), containsString("abc"));
assertThat(e.getMessage(), containsString("epoch_second"));

e = expectThrows(DateTimeParseException.class, () -> formatter.parse("1234.abc"));
assertThat(e.getMessage(), is("Text '1234.abc' could not be parsed, unparsed text found at index 5"));
e = expectThrows(IllegalArgumentException.class, () -> formatter.parse("1234.abc"));
assertThat(e.getMessage(), containsString("1234.abc"));
assertThat(e.getMessage(), containsString("epoch_second"));

e = expectThrows(DateTimeParseException.class, () -> formatter.parse("1234.1234567890"));
assertThat(e.getMessage(), is("Text '1234.1234567890' could not be parsed, unparsed text found at index 14"));
}
e = expectThrows(IllegalArgumentException.class, () -> formatter.parse("1234.1234567890"));
assertThat(e.getMessage(), containsString("1234.1234567890"));
assertThat(e.getMessage(), containsString("epoch_second")); }

public void testEpochMilliParsersWithDifferentFormatters() {
DateFormatter formatter = DateFormatter.forPattern("8strict_date_optional_time||epoch_millis");
Original file line number Diff line number Diff line change
@@ -261,7 +261,8 @@ public void testIllegalMathFormat() {

public void testIllegalDateFormat() {
assertParseException("Expected bad timestamp exception", Long.toString(Long.MAX_VALUE) + "0", "failed to parse date field");
assertParseException("Expected bad date format exception", "123bogus", "could not be parsed, unparsed text found at index 3");
assertParseException("Expected bad date format exception", "123bogus",
"failed to parse date field [123bogus] with format [8dateOptionalTime||epoch_millis]");
}

public void testOnlyCallsNowIfNecessary() {

0 comments on commit a154751

Please sign in to comment.