From a9ef08dc67f353ee8dae71c6be355530ee6304d3 Mon Sep 17 00:00:00 2001 From: Jordan Wilson <37088125+jordanw-bq@users.noreply.github.com> Date: Fri, 28 Feb 2020 10:17:30 -0800 Subject: [PATCH] Report date data as a standardized format (#367) * expose date fields using a standardized date format * move date format logic to new class * add tests for DateTimeFormatter * added some negative tests * style fixes * testing locale fixes * addressed code review comments and build failures * post-merge fix * additional fixes * get CAST alias info from result set class, rather than cluster state * remove unused import * remove unused import * reduce duplication & reference enum value * setting default timezone to UTC while parsing date values * add support for custom & multiple formats * add case for Kibana flights date data with T but no time field (cherry picked from commit 7c57d72ad22b1df39b5f89838a41fbf2e8a76829) --- build.gradle | 1 + .../sql/esdomain/LocalClusterState.java | 1 - .../executor/format/DateFieldFormatter.java | 195 +++++ .../sql/executor/format/DateFormat.java | 134 ++++ .../sql/executor/format/Protocol.java | 2 +- .../sql/executor/format/SelectResultSet.java | 32 +- .../sql/esintgtest/SQLFunctionsIT.java | 12 +- .../format/DateFieldFormatterTest.java | 693 ++++++++++++++++++ 8 files changed, 1061 insertions(+), 9 deletions(-) create mode 100644 src/main/java/com/amazon/opendistroforelasticsearch/sql/executor/format/DateFieldFormatter.java create mode 100644 src/main/java/com/amazon/opendistroforelasticsearch/sql/executor/format/DateFormat.java create mode 100644 src/test/java/com/amazon/opendistroforelasticsearch/sql/executor/format/DateFieldFormatterTest.java diff --git a/build.gradle b/build.gradle index 6c5d2955f7..d76a9a1df8 100644 --- a/build.gradle +++ b/build.gradle @@ -223,6 +223,7 @@ dependencies { compile group: "org.elasticsearch.plugin", name: 'reindex-client', version: "${es_version}" compile group: 'com.google.guava', name: 'guava', version:'15.0' compile group: 'org.json', name: 'json', version:'20180813' + compile group: 'org.apache.commons', name: 'commons-lang3', version: '3.9' // ANTLR gradle plugin and runtime dependency antlr "org.antlr:antlr4:4.7.1" diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/sql/esdomain/LocalClusterState.java b/src/main/java/com/amazon/opendistroforelasticsearch/sql/esdomain/LocalClusterState.java index 190c827c06..2d33ea53e2 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/sql/esdomain/LocalClusterState.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/sql/esdomain/LocalClusterState.java @@ -91,7 +91,6 @@ public class LocalClusterState { */ private final Map latestSettings = new ConcurrentHashMap<>(); - public static synchronized LocalClusterState state() { if (INSTANCE == null) { INSTANCE = new LocalClusterState(); diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/sql/executor/format/DateFieldFormatter.java b/src/main/java/com/amazon/opendistroforelasticsearch/sql/executor/format/DateFieldFormatter.java new file mode 100644 index 0000000000..f05d340bad --- /dev/null +++ b/src/main/java/com/amazon/opendistroforelasticsearch/sql/executor/format/DateFieldFormatter.java @@ -0,0 +1,195 @@ +/* + * Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.sql.executor.format; + +import com.amazon.opendistroforelasticsearch.sql.esdomain.LocalClusterState; +import com.amazon.opendistroforelasticsearch.sql.esdomain.mapping.FieldMappings; +import com.amazon.opendistroforelasticsearch.sql.esdomain.mapping.TypeMappings; +import com.google.common.annotations.VisibleForTesting; +import org.apache.commons.lang3.time.DateUtils; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.text.ParseException; +import java.util.Arrays; +import java.util.Collection; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TimeZone; +import java.util.stream.Collectors; + +/** + * Formatter to transform date fields into a consistent format for consumption by clients. + */ +public class DateFieldFormatter { + private static final Logger LOG = LogManager.getLogger(DateFieldFormatter.class); + private static final String FORMAT_JDBC = "yyyy-MM-dd HH:mm:ss.SSS"; + private static final String FORMAT_DELIMITER = "\\|\\|"; + + private static final String FORMAT_DOT_DATE_AND_TIME = "yyyy-MM-dd'T'HH:mm:ss.SSSZ"; + private static final String FORMAT_DOT_KIBANA_SAMPLE_DATA_LOGS_EXCEPTION = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"; + private static final String FORMAT_DOT_KIBANA_SAMPLE_DATA_FLIGHTS_EXCEPTION = "yyyy-MM-dd'T'HH:mm:ss"; + private static final String FORMAT_DOT_KIBANA_SAMPLE_DATA_FLIGHTS_EXCEPTION_NO_TIME = "yyyy-MM-dd'T'"; + private static final String FORMAT_DOT_KIBANA_SAMPLE_DATA_ECOMMERCE_EXCEPTION = "yyyy-MM-dd'T'HH:mm:ssXXX"; + private static final String FORMAT_DOT_DATE = DateFormat.getFormatString("date"); + + private final Map> dateFieldFormatMap; + private final Map fieldAliasMap; + private Set dateColumns; + + public DateFieldFormatter(String indexName, List columns, Map fieldAliasMap) { + this.dateFieldFormatMap = getDateFieldFormatMap(indexName); + this.dateColumns = getDateColumns(columns); + this.fieldAliasMap = fieldAliasMap; + } + + @VisibleForTesting + protected DateFieldFormatter(Map> dateFieldFormatMap, + List columns, + Map fieldAliasMap) { + this.dateFieldFormatMap = dateFieldFormatMap; + this.dateColumns = getDateColumns(columns); + this.fieldAliasMap = fieldAliasMap; + } + + /** + * Apply the JDBC date format ({@code yyyy-MM-dd HH:mm:ss.SSS}) to date values in the current row. + * + * @param rowSource The row in which to format the date values. + */ + public void applyJDBCDateFormat(Map rowSource) { + for (String columnName : dateColumns) { + Object columnOriginalDate = rowSource.get(columnName); + if (columnOriginalDate == null) { + // Don't try to parse null date values + continue; + } + + List formats = getFormatsForColumn(columnName); + if (formats == null) { + LOG.warn("Could not determine date formats for column {}; returning original value", columnName); + continue; + } + + Date date = parseDateString(formats, columnOriginalDate.toString()); + if (date != null) { + rowSource.put(columnName, DateFormat.getFormattedDate(date, FORMAT_JDBC)); + break; + } else { + LOG.warn("Could not parse date value; returning original value"); + } + } + } + + private List getFormatsForColumn(String columnName) { + // Handle special cases for column names + if (fieldAliasMap.get(columnName) != null) { + // Column was aliased, and we need to find the base name for the column + columnName = fieldAliasMap.get(columnName); + } else if (columnName.split("\\.").length == 2) { + // Column is part of a join, and is qualified by the table alias + columnName = columnName.split("\\.")[1]; + } + return dateFieldFormatMap.get(columnName); + } + + private Set getDateColumns(List columns) { + return columns.stream() + .filter(column -> column.getType().equals(Schema.Type.DATE.nameLowerCase())) + .map(Schema.Column::getName) + .collect(Collectors.toSet()); + } + + private Map> getDateFieldFormatMap(String indexName) { + LocalClusterState state = LocalClusterState.state(); + Map> formatMap = new HashMap<>(); + + String[] indices = indexName.split("\\|"); + Collection typeProperties = state.getFieldMappings(indices) + .allMappings(); + + for (TypeMappings mappings: typeProperties) { + FieldMappings fieldMappings = mappings.firstMapping(); + for (Map.Entry> field : fieldMappings.data().entrySet()) { + String fieldName = field.getKey(); + Map properties = field.getValue(); + + if (properties.containsKey("format")) { + formatMap.put(fieldName, getFormatsFromProperties(properties.get("format").toString())); + } else { + // Give all field types a format, since operations such as casts + // can change the output type for a field to `date`. + formatMap.put(fieldName, getFormatsFromProperties("date_optional_time")); + } + } + } + + return formatMap; + } + + private List getFormatsFromProperties(String formatProperty) { + String[] formats = formatProperty.split(FORMAT_DELIMITER); + return Arrays.asList(formats); + } + + private Date parseDateString(List formats, String columnOriginalDate) { + TimeZone originalDefaultTimeZone = TimeZone.getDefault(); + Date parsedDate = null; + + // Apache Commons DateUtils uses the default TimeZone for the JVM when parsing. + // However, since all dates on Elasticsearch are stored as UTC, we need to + // parse these values using the UTC timezone. + TimeZone.setDefault(TimeZone.getTimeZone("UTC")); + for (String columnFormat : formats) { + try { + switch (columnFormat) { + case "date_optional_time": + parsedDate = DateUtils.parseDate( + columnOriginalDate, + FORMAT_DOT_KIBANA_SAMPLE_DATA_LOGS_EXCEPTION, + FORMAT_DOT_KIBANA_SAMPLE_DATA_FLIGHTS_EXCEPTION, + FORMAT_DOT_KIBANA_SAMPLE_DATA_FLIGHTS_EXCEPTION_NO_TIME, + FORMAT_DOT_KIBANA_SAMPLE_DATA_ECOMMERCE_EXCEPTION, + FORMAT_DOT_DATE_AND_TIME, + FORMAT_DOT_DATE); + break; + case "epoch_millis": + parsedDate = new Date(Long.parseLong(columnOriginalDate)); + break; + case "epoch_second": + parsedDate = new Date(Long.parseLong(columnOriginalDate) * 1000); + break; + default: + String formatString = DateFormat.getFormatString(columnFormat); + if (formatString == null) { + // Custom format; take as-is + formatString = columnFormat; + } + parsedDate = DateUtils.parseDate(columnOriginalDate, formatString); + } + } catch (ParseException | NumberFormatException e) { + LOG.warn(String.format("Could not parse date string %s as %s", columnOriginalDate, columnFormat)); + } + } + // Reset default timezone after parsing + TimeZone.setDefault(originalDefaultTimeZone); + + return parsedDate; + } +} diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/sql/executor/format/DateFormat.java b/src/main/java/com/amazon/opendistroforelasticsearch/sql/executor/format/DateFormat.java new file mode 100644 index 0000000000..c3baa0d912 --- /dev/null +++ b/src/main/java/com/amazon/opendistroforelasticsearch/sql/executor/format/DateFormat.java @@ -0,0 +1,134 @@ +/* + * Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.sql.executor.format; + +import java.time.Instant; +import java.time.ZoneId; +import java.time.ZonedDateTime; +import java.time.format.DateTimeFormatter; +import java.util.HashMap; +import java.util.Map; + +public class DateFormat { + + private static Map formatMap = new HashMap<>(); + + static { + // Special cases that are parsed separately + formatMap.put("date_optional_time", ""); + formatMap.put("epoch_millis", ""); + formatMap.put("epoch_second", ""); + + formatMap.put("basic_date", Date.BASIC_DATE); + formatMap.put("basic_date_time", Date.BASIC_DATE + Time.T + Time.BASIC_TIME + Time.MILLIS + Time.TZ); + formatMap.put("basic_date_time_no_millis", Date.BASIC_DATE + Time.T + Time.BASIC_TIME + Time.TZ); + + formatMap.put("basic_ordinal_date", Date.BASIC_ORDINAL_DATE); + formatMap.put("basic_ordinal_date_time", + Date.BASIC_ORDINAL_DATE + Time.T + Time.BASIC_TIME + Time.MILLIS + Time.TZ); + formatMap.put("basic_ordinal_date_time_no_millis", Date.BASIC_ORDINAL_DATE+ Time.T + Time.BASIC_TIME + Time.TZ); + + formatMap.put("basic_time", Time.BASIC_TIME + Time.MILLIS + Time.TZ); + formatMap.put("basic_time_no_millis", Time.BASIC_TIME + Time.TZ); + + formatMap.put("basic_t_time", Time.T + Time.BASIC_TIME + Time.MILLIS + Time.TZ); + formatMap.put("basic_t_time_no_millis", Time.T + Time.BASIC_TIME + Time.TZ); + + formatMap.put("basic_week_date", Date.BASIC_WEEK_DATE); + formatMap.put("basic_week_date_time", Date.BASIC_WEEK_DATE + Time.T + Time.BASIC_TIME + Time.MILLIS + Time.TZ); + formatMap.put("basic_week_date_time_no_millis", Date.BASIC_WEEK_DATE + Time.T + Time.BASIC_TIME + Time.TZ); + + formatMap.put("date", Date.DATE); + formatMap.put("date_hour", Date.DATE + Time.T + Time.HOUR); + formatMap.put("date_hour_minute", Date.DATE + Time.T + Time.HOUR_MINUTE); + formatMap.put("date_hour_minute_second", Date.DATE + Time.T + Time.TIME); + formatMap.put("date_hour_minute_second_fraction", Date.DATE + Time.T + Time.TIME + Time.MILLIS); + formatMap.put("date_hour_minute_second_millis", Date.DATE + Time.T + Time.TIME + Time.MILLIS); + formatMap.put("date_time", Date.DATE + Time.T + Time.TIME + Time.MILLIS + Time.TZZ); + formatMap.put("date_time_no_millis", Date.DATE + Time.T + Time.TIME + Time.TZZ); + + formatMap.put("hour", Time.HOUR); + formatMap.put("hour_minute", Time.HOUR_MINUTE); + formatMap.put("hour_minute_second", Time.TIME); + formatMap.put("hour_minute_second_fraction", Time.TIME + Time.MILLIS); + formatMap.put("hour_minute_second_millis", Time.TIME + Time.MILLIS); + + formatMap.put("ordinal_date", Date.ORDINAL_DATE); + formatMap.put("ordinal_date_time", Date.ORDINAL_DATE + Time.T + Time.TIME + Time.MILLIS + Time.TZZ); + formatMap.put("ordinal_date_time_no_millis", Date.ORDINAL_DATE + Time.T + Time.TIME + Time.TZZ); + + formatMap.put("time", Time.TIME + Time.MILLIS + Time.TZZ); + formatMap.put("time_no_millis", Time.TIME + Time.TZZ); + + formatMap.put("t_time", Time.T + Time.TIME + Time.MILLIS + Time.TZZ); + formatMap.put("t_time_no_millis", Time.T + Time.TIME + Time.TZZ); + + formatMap.put("week_date", Date.WEEK_DATE); + formatMap.put("week_date_time", Date.WEEK_DATE + Time.T + Time.TIME + Time.MILLIS + Time.TZZ); + formatMap.put("week_date_time_no_millis", Date.WEEK_DATE + Time.T + Time.TIME + Time.TZZ); + + // Note: input mapping is "weekyear", but output value is "week_year" + formatMap.put("week_year", Date.WEEKYEAR); + formatMap.put("weekyear_week", Date.WEEKYEAR_WEEK); + formatMap.put("weekyear_week_day", Date.WEEK_DATE); + + formatMap.put("year", Date.YEAR); + formatMap.put("year_month", Date.YEAR_MONTH); + formatMap.put("year_month_day", Date.DATE); + } + + private DateFormat() { + } + + public static String getFormatString(String formatName) { + return formatMap.get(formatName); + } + + public static String getFormattedDate(java.util.Date date, String dateFormat) { + Instant instant = date.toInstant(); + ZonedDateTime zdt = ZonedDateTime.ofInstant(instant, ZoneId.of("Etc/UTC")); + return zdt.format(DateTimeFormatter.ofPattern(dateFormat)); + } + + private static class Date { + static String BASIC_DATE = "yyyyMMdd"; + static String BASIC_ORDINAL_DATE = "yyyyDDD"; + static String BASIC_WEEK_DATE = "YYYY'W'wwu"; + + static String DATE = "yyyy-MM-dd"; + static String ORDINAL_DATE = "yyyy-DDD"; + + static String YEAR = "yyyy"; + static String YEAR_MONTH = "yyyy-MM"; + + static String WEEK_DATE = "YYYY-'W'ww-u"; + static String WEEKYEAR = "YYYY"; + static String WEEKYEAR_WEEK = "YYYY-'W'ww"; + } + + private static class Time { + static String T = "'T'"; + static String BASIC_TIME = "HHmmss"; + static String TIME = "HH:mm:ss"; + + static String HOUR = "HH"; + static String HOUR_MINUTE = "HH:mm"; + + static String MILLIS = ".SSS"; + static String TZ = "Z"; + static String TZZ = "XX"; + } +} diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/sql/executor/format/Protocol.java b/src/main/java/com/amazon/opendistroforelasticsearch/sql/executor/format/Protocol.java index 827f2ae9e2..7dffb0baaa 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/sql/executor/format/Protocol.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/sql/executor/format/Protocol.java @@ -80,7 +80,7 @@ private ResultSet loadResultSet(Client client, QueryStatement queryStatement, Ob if (queryStatement instanceof Delete) { return new DeleteResultSet(client, (Delete) queryStatement, queryResult); } else if (queryStatement instanceof Query) { - return new SelectResultSet(client, (Query) queryStatement, queryResult, scriptColumnType); + return new SelectResultSet(client, (Query) queryStatement, queryResult, scriptColumnType, formatType); } else if (queryStatement instanceof IndexStatement) { IndexStatement statement = (IndexStatement) queryStatement; StatementType statementType = statement.getStatementType(); diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/sql/executor/format/SelectResultSet.java b/src/main/java/com/amazon/opendistroforelasticsearch/sql/executor/format/SelectResultSet.java index 7e2e2e9239..a94aed514c 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/sql/executor/format/SelectResultSet.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/sql/executor/format/SelectResultSet.java @@ -15,7 +15,10 @@ package com.amazon.opendistroforelasticsearch.sql.executor.format; +import com.alibaba.druid.sql.ast.SQLExpr; import com.alibaba.druid.sql.ast.expr.SQLCaseExpr; +import com.alibaba.druid.sql.ast.expr.SQLCastExpr; +import com.alibaba.druid.sql.ast.expr.SQLIdentifierExpr; import com.amazon.opendistroforelasticsearch.sql.domain.ColumnTypeProvider; import com.amazon.opendistroforelasticsearch.sql.domain.Field; import com.amazon.opendistroforelasticsearch.sql.domain.JoinSelect; @@ -25,6 +28,7 @@ import com.amazon.opendistroforelasticsearch.sql.domain.TableOnJoinSelect; import com.amazon.opendistroforelasticsearch.sql.esdomain.mapping.FieldMapping; import com.amazon.opendistroforelasticsearch.sql.exception.SqlFeatureNotImplementedException; +import com.amazon.opendistroforelasticsearch.sql.executor.Format; import com.amazon.opendistroforelasticsearch.sql.utils.SQLFunctions; import org.elasticsearch.action.admin.indices.mapping.get.GetFieldMappingsRequest; import org.elasticsearch.action.admin.indices.mapping.get.GetFieldMappingsResponse; @@ -58,6 +62,7 @@ public class SelectResultSet extends ResultSet { public static final String SCORE = "_score"; + private final String formatType; private Query query; private Object queryResult; @@ -73,11 +78,20 @@ public class SelectResultSet extends ResultSet { private long totalHits; private List rows; - public SelectResultSet(Client client, Query query, Object queryResult, ColumnTypeProvider outputColumnType) { + private DateFieldFormatter dateFieldFormatter; + // alias -> base field name + private Map fieldAliasMap = new HashMap<>(); + + public SelectResultSet(Client client, + Query query, + Object queryResult, + ColumnTypeProvider outputColumnType, + String formatType) { this.client = client; this.query = query; this.queryResult = queryResult; this.selectAll = false; + this.formatType = formatType; this.outputColumnType = outputColumnType; if (isJoinQuery()) { @@ -89,6 +103,7 @@ public SelectResultSet(Client client, Query query, Object queryResult, ColumnTyp } this.schema = new Schema(indexName, typeName, columns); this.head = schema.getHeaders(); + this.dateFieldFormatter = new DateFieldFormatter(indexName, columns, fieldAliasMap); extractData(); this.dataRows = new DataRows(size, totalHits, rows); @@ -380,6 +395,15 @@ private List populateColumns(Query query, String[] fieldNames, Ma if (fieldMap.get(fieldName) instanceof MethodField) { MethodField methodField = (MethodField) fieldMap.get(fieldName); int fieldIndex = fieldNameList.indexOf(fieldName); + + SQLExpr expr = methodField.getExpression(); + if (expr instanceof SQLCastExpr) { + // Since CAST expressions create an alias for a field, we need to save the original field name + // for this alias for formatting data later. + SQLIdentifierExpr castFieldIdentifier = (SQLIdentifierExpr) ((SQLCastExpr) expr).getExpr(); + fieldAliasMap.put(methodField.getAlias(), castFieldIdentifier.getName()); + } + columns.add( new Schema.Column( methodField.getAlias(), @@ -530,8 +554,14 @@ private List populateRows(SearchHits searchHits) { for (Map.Entry field : hit.getFields().entrySet()) { rowSource.put(field.getKey(), field.getValue().getValue()); } + if (formatType.equalsIgnoreCase(Format.JDBC.getFormatName())) { + dateFieldFormatter.applyJDBCDateFormat(rowSource); + } result = flatNestedField(newKeys, rowSource, hit.getInnerHits()); } else { + if (formatType.equalsIgnoreCase(Format.JDBC.getFormatName())) { + dateFieldFormatter.applyJDBCDateFormat(rowSource); + } result = new ArrayList<>(); result.add(new DataRows.Row(rowSource)); } diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/sql/esintgtest/SQLFunctionsIT.java b/src/test/java/com/amazon/opendistroforelasticsearch/sql/esintgtest/SQLFunctionsIT.java index 80e61cb381..d639385527 100644 --- a/src/test/java/com/amazon/opendistroforelasticsearch/sql/esintgtest/SQLFunctionsIT.java +++ b/src/test/java/com/amazon/opendistroforelasticsearch/sql/esintgtest/SQLFunctionsIT.java @@ -331,8 +331,8 @@ public void castKeywordFieldToDatetimeWithoutAliasJdbcFormatTest() { verifySchema(response, schema("cast_date_keyword", null, "date")); verifyDataRows(response, - rows("2014-08-19T07:09:13.434Z"), - rows("2019-09-25T02:04:13.469Z")); + rows("2014-08-19 07:09:13.434"), + rows("2019-09-25 02:04:13.469")); } @Test @@ -343,8 +343,8 @@ public void castKeywordFieldToDatetimeWithAliasJdbcFormatTest() { verifySchema(response, schema("test_alias", null, "date")); verifyDataRows(response, - rows("2014-08-19T07:09:13.434Z"), - rows("2019-09-25T02:04:13.469Z")); + rows("2014-08-19 07:09:13.434"), + rows("2019-09-25 02:04:13.469")); } @Test @@ -355,8 +355,8 @@ public void castFieldToDatetimeWithWhereClauseJdbcFormatTest() { verifySchema(response, schema("cast_date_keyword", null, "date")); verifyDataRows(response, - rows("2014-08-19T07:09:13.434Z"), - rows("2019-09-25T02:04:13.469Z")); + rows("2014-08-19 07:09:13.434"), + rows("2019-09-25 02:04:13.469")); } @Test diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/sql/executor/format/DateFieldFormatterTest.java b/src/test/java/com/amazon/opendistroforelasticsearch/sql/executor/format/DateFieldFormatterTest.java new file mode 100644 index 0000000000..b08dc21959 --- /dev/null +++ b/src/test/java/com/amazon/opendistroforelasticsearch/sql/executor/format/DateFieldFormatterTest.java @@ -0,0 +1,693 @@ +/* + * Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.sql.executor.format; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import org.junit.Test; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.junit.Assert.*; + +public class DateFieldFormatterTest { + + @Test + public void testKibanaSampleDataEcommerceOrderDateField() + { + String columnName = "order_date"; + String dateFormat = "date_optional_time"; + String originalDateValue = "2020-02-24T09:28:48+00:00"; + String expectedDateValue = "2020-02-24 09:28:48.000"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testKibanaSampleDataFlightsTimestampField() + { + String columnName = "timestamp"; + String dateFormat = "date_optional_time"; + String originalDateValue = "2020-02-03T00:00:00"; + String expectedDateValue = "2020-02-03 00:00:00.000"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testKibanaSampleDataFlightsTimestampFieldNoTime() + { + String columnName = "timestamp"; + String dateFormat = "date_optional_time"; + String originalDateValue = "2020-02-03T"; + String expectedDateValue = "2020-02-03 00:00:00.000"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testKibanaSampleDataLogsUtcDateField() + { + String columnName = "utc_date"; + String dateFormat = "date_optional_time"; + String originalDateValue = "2020-02-02T00:39:02.912Z"; + String expectedDateValue = "2020-02-02 00:39:02.912"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testEpochMillis() + { + String columnName = "date_field"; + String dateFormat = "epoch_millis"; + String originalDateValue = "727430805000"; + String expectedDateValue = "1993-01-19 08:06:45.000"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testEpochSecond() + { + String columnName = "date_field"; + String dateFormat = "epoch_second"; + String originalDateValue = "727430805"; + String expectedDateValue = "1993-01-19 08:06:45.000"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testDateOptionalTimeDateOnly() + { + String columnName = "date_field"; + String dateFormat = "date_optional_time"; + String originalDateValue = "1993-01-19"; + String expectedDateValue = "1993-01-19 00:00:00.000"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testDateOptionalTimeDateAndTime() + { + String columnName = "date_field"; + String dateFormat = "date_optional_time"; + String originalDateValue = "1993-01-19T00:06:45.123-0800"; + String expectedDateValue = "1993-01-19 08:06:45.123"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testBasicDate() + { + String columnName = "date_field"; + String dateFormat = "basic_date"; + String originalDateValue = "19930119"; + String expectedDateValue = "1993-01-19 00:00:00.000"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testBasicDateTime() + { + String columnName = "date_field"; + String dateFormat = "basic_date_time"; + String originalDateValue = "19930119T120645.123-0800"; + String expectedDateValue = "1993-01-19 20:06:45.123"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testBasicDateTimeNoMillis() + { + String columnName = "date_field"; + String dateFormat = "basic_date_time_no_millis"; + String originalDateValue = "19930119T120645-0800"; + String expectedDateValue = "1993-01-19 20:06:45.000"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testBasicOrdinalDate() + { + String columnName = "date_field"; + String dateFormat = "basic_ordinal_date"; + String originalDateValue = "1993019"; + String expectedDateValue = "1993-01-19 00:00:00.000"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testBasicOrdinalDateTime() + { + String columnName = "date_field"; + String dateFormat = "basic_ordinal_date_time"; + String originalDateValue = "1993019T120645.123-0800"; + String expectedDateValue = "1993-01-19 20:06:45.123"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testBasicOrdinalDateTimeNoMillis() + { + String columnName = "date_field"; + String dateFormat = "basic_ordinal_date_time_no_millis"; + String originalDateValue = "1993019T120645-0800"; + String expectedDateValue = "1993-01-19 20:06:45.000"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testBasicTime() + { + String columnName = "date_field"; + String dateFormat = "basic_time"; + String originalDateValue = "120645.123-0800"; + String expectedDateValue = "1970-01-01 20:06:45.123"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testBasicTimeNoMillis() + { + String columnName = "date_field"; + String dateFormat = "basic_time_no_millis"; + String originalDateValue = "120645-0800"; + String expectedDateValue = "1970-01-01 20:06:45.000"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testBasicTTime() + { + String columnName = "date_field"; + String dateFormat = "basic_t_time"; + String originalDateValue = "T120645.123-0800"; + String expectedDateValue = "1970-01-01 20:06:45.123"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testBasicTTimeNoMillis() + { + String columnName = "date_field"; + String dateFormat = "basic_t_time_no_millis"; + String originalDateValue = "T120645-0800"; + String expectedDateValue = "1970-01-01 20:06:45.000"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testBasicWeekDate() + { + String columnName = "date_field"; + String dateFormat = "basic_week_date"; + String originalDateValue = "1993W042"; + String expectedDateValue = "1993-01-19 00:00:00.000"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testBasicWeekDateTime() + { + String columnName = "date_field"; + String dateFormat = "basic_week_date_time"; + String originalDateValue = "1993W042T120645.123-0800"; + String expectedDateValue = "1993-01-19 20:06:45.123"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testBasicWeekDateTimeNoMillis() + { + String columnName = "date_field"; + String dateFormat = "basic_week_date_time_no_millis"; + String originalDateValue = "1993W042T120645-0800"; + String expectedDateValue = "1993-01-19 20:06:45.000"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testDate() + { + String columnName = "date_field"; + String dateFormat = "date"; + String originalDateValue = "1993-01-19"; + String expectedDateValue = "1993-01-19 00:00:00.000"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testDateHour() + { + String columnName = "date_field"; + String dateFormat = "date_hour"; + String originalDateValue = "1993-01-19T12"; + String expectedDateValue = "1993-01-19 12:00:00.000"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testDateHourMinute() + { + String columnName = "date_field"; + String dateFormat = "date_hour_minute"; + String originalDateValue = "1993-01-19T12:06"; + String expectedDateValue = "1993-01-19 12:06:00.000"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testDateHourMinuteSecond() + { + String columnName = "date_field"; + String dateFormat = "date_hour_minute_second"; + String originalDateValue = "1993-01-19T12:06:45"; + String expectedDateValue = "1993-01-19 12:06:45.000"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testDateHourMinuteSecondFraction() + { + String columnName = "date_field"; + String dateFormat = "date_hour_minute_second_fraction"; + String originalDateValue = "1993-01-19T12:06:45.123"; + String expectedDateValue = "1993-01-19 12:06:45.123"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testDateHourMinuteSecondMillis() + { + String columnName = "date_field"; + String dateFormat = "date_hour_minute_second_millis"; + String originalDateValue = "1993-01-19T12:06:45.123"; + String expectedDateValue = "1993-01-19 12:06:45.123"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testDateTime() + { + String columnName = "date_field"; + String dateFormat = "date_time"; + String originalDateValue = "1993-01-19T12:06:45.123-0800"; + String expectedDateValue = "1993-01-19 20:06:45.123"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testDateTimeNoMillis() + { + String columnName = "date_field"; + String dateFormat = "date_time_no_millis"; + String originalDateValue = "1993-01-19T12:06:45-0800"; + String expectedDateValue = "1993-01-19 20:06:45.000"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testHour() + { + String columnName = "date_field"; + String dateFormat = "hour"; + String originalDateValue = "12"; + String expectedDateValue = "1970-01-01 12:00:00.000"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testHourMinute() + { + String columnName = "date_field"; + String dateFormat = "hour_minute"; + String originalDateValue = "12:06"; + String expectedDateValue = "1970-01-01 12:06:00.000"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testHourMinuteSecond() + { + String columnName = "date_field"; + String dateFormat = "hour_minute_second"; + String originalDateValue = "12:06:45"; + String expectedDateValue = "1970-01-01 12:06:45.000"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testHourMinuteSecondFraction() + { + String columnName = "date_field"; + String dateFormat = "hour_minute_second_fraction"; + String originalDateValue = "12:06:45.123"; + String expectedDateValue = "1970-01-01 12:06:45.123"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testHourMinuteSecondMillis() + { + String columnName = "date_field"; + String dateFormat = "hour_minute_second_millis"; + String originalDateValue = "12:06:45.123"; + String expectedDateValue = "1970-01-01 12:06:45.123"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testOrdinalDate() + { + String columnName = "date_field"; + String dateFormat = "ordinal_date"; + String originalDateValue = "1993-019"; + String expectedDateValue = "1993-01-19 00:00:00.000"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testOrdinalDateTime() + { + String columnName = "date_field"; + String dateFormat = "ordinal_date_time"; + String originalDateValue = "1993-019T12:06:45.123-0800"; + String expectedDateValue = "1993-01-19 20:06:45.123"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testOrdinalDateTimeNoMillis() + { + String columnName = "date_field"; + String dateFormat = "ordinal_date_time_no_millis"; + String originalDateValue = "1993-019T12:06:45-0800"; + String expectedDateValue = "1993-01-19 20:06:45.000"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testTime() + { + String columnName = "date_field"; + String dateFormat = "time"; + String originalDateValue = "12:06:45.123-0800"; + String expectedDateValue = "1970-01-01 20:06:45.123"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testTimeNoMillis() + { + String columnName = "date_field"; + String dateFormat = "time_no_millis"; + String originalDateValue = "12:06:45-0800"; + String expectedDateValue = "1970-01-01 20:06:45.000"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testTTime() + { + String columnName = "date_field"; + String dateFormat = "t_time"; + String originalDateValue = "T12:06:45.123-0800"; + String expectedDateValue = "1970-01-01 20:06:45.123"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testTTimeNoMillis() + { + String columnName = "date_field"; + String dateFormat = "t_time_no_millis"; + String originalDateValue = "T12:06:45-0800"; + String expectedDateValue = "1970-01-01 20:06:45.000"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testWeekDate() + { + String columnName = "date_field"; + String dateFormat = "week_date"; + String originalDateValue = "1993-W04-2"; + String expectedDateValue = "1993-01-19 00:00:00.000"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testWeekDateTime() + { + String columnName = "date_field"; + String dateFormat = "week_date_time"; + String originalDateValue = "1993-W04-2T12:06:45.123-0800"; + String expectedDateValue = "1993-01-19 20:06:45.123"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testWeekDateTimeNoMillis() + { + String columnName = "date_field"; + String dateFormat = "week_date_time_no_millis"; + String originalDateValue = "1993-W04-2T12:06:45-0800"; + String expectedDateValue = "1993-01-19 20:06:45.000"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testWeekyear() + { + String columnName = "date_field"; + String dateFormat = "week_year"; + String originalDateValue = "1993"; + String expectedDateValue = "1993-01-01 00:00:00.000"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testWeekyearWeek() + { + String columnName = "date_field"; + String dateFormat = "weekyear_week"; + String originalDateValue = "1993-W04"; + String expectedDateValue = "1993-01-17 00:00:00.000"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testWeekyearWeekDay() + { + String columnName = "date_field"; + String dateFormat = "weekyear_week_day"; + String originalDateValue = "1993-W04-2"; + String expectedDateValue = "1993-01-19 00:00:00.000"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testYear() + { + String columnName = "date_field"; + String dateFormat = "year"; + String originalDateValue = "1993"; + String expectedDateValue = "1993-01-01 00:00:00.000"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testYearMonth() + { + String columnName = "date_field"; + String dateFormat = "year_month"; + String originalDateValue = "1993-01"; + String expectedDateValue = "1993-01-01 00:00:00.000"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testYearMonthDay() + { + String columnName = "date_field"; + String dateFormat = "year_month_day"; + String originalDateValue = "1993-01-19"; + String expectedDateValue = "1993-01-19 00:00:00.000"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testCustomFormat() + { + String columnName = "date_field"; + String dateFormat = "EEE, MMM d, ''yy"; + + String originalDateValue = "Tue, Jan 19, '93"; + String expectedDateValue = "1993-01-19 00:00:00.000"; + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testMultipleFormats() + { + String columnName = "date_field"; + String dateFormat = "date_optional_time||epoch_millis"; + + String originalDateValue = "1993-01-19"; + String expectedDateValue = "1993-01-19 00:00:00.000"; + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + + originalDateValue = "727401600000"; + expectedDateValue = "1993-01-19 00:00:00.000"; + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testMultipleCustomFormats() + { + String columnName = "date_field"; + String dateFormat = "EEE, MMM d, ''yy||yyMMddHHmmssZ"; + + String originalDateValue = "Tue, Jan 19, '93"; + String expectedDateValue = "1993-01-19 00:00:00.000"; + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + + originalDateValue = "930119000000-0000"; + expectedDateValue = "1993-01-19 00:00:00.000"; + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testNamedAndCustomFormats() + { + String columnName = "date_field"; + String dateFormat = "EEE, MMM d, ''yy||hour_minute_second"; + + String originalDateValue = "Tue, Jan 19, '93"; + String expectedDateValue = "1993-01-19 00:00:00.000"; + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + + originalDateValue = "12:06:45"; + expectedDateValue = "1970-01-01 12:06:45.000"; + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testIncorrectFormat() + { + String columnName = "date_field"; + String dateFormat = "date_optional_time"; + String originalDateValue = "1581724085"; + // Invalid format for date value; should return original value + String expectedDateValue = "1581724085"; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testNullDateData() + { + String columnName = "date_field"; + String dateFormat = "date_optional_time"; + String originalDateValue = null; + // Nulls should be preserved + String expectedDateValue = null; + + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + private void verifyFormatting(String columnName, String dateFormatProperty, String originalDateValue, String expectedDateValue) + { + List columns = buildColumnList(columnName); + Map> dateFieldFormatMap = buildDateFieldFormatMap(columnName, dateFormatProperty); + + Map rowSource = new HashMap<>(); + rowSource.put(columnName, originalDateValue); + + DateFieldFormatter dateFieldFormatter = new DateFieldFormatter(dateFieldFormatMap, columns, new HashMap<>()); + executeFormattingAndCompare(dateFieldFormatter, rowSource, columnName, expectedDateValue); + } + + private void executeFormattingAndCompare( + DateFieldFormatter formatter, + Map rowSource, + String columnToCheck, + String expectedDateValue) { + formatter.applyJDBCDateFormat(rowSource); + assertEquals(expectedDateValue, rowSource.get(columnToCheck)); + } + + private List buildColumnList(String columnName) { + return ImmutableList.builder() + .add(new Schema.Column(columnName, null, Schema.Type.DATE)) + .build(); + } + + private Map> buildDateFieldFormatMap(String columnName, String dateFormatProperty) { + return ImmutableMap.>builder() + .put(columnName, Arrays.asList(dateFormatProperty.split("\\|\\|"))) + .build(); + } +} \ No newline at end of file