Skip to content
This repository has been archived by the owner on Aug 2, 2022. It is now read-only.

Report date data as a standardized format #367

Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@ dependencies {
compile group: "org.elasticsearch.plugin", name: 'reindex-client', version: "${es_version}"
compile group: 'com.google.guava', name: 'guava', version:'15.0'
compile group: 'org.json', name: 'json', version:'20180813'
compile group: 'org.apache.commons', name: 'commons-lang3', version: '3.9'

// ANTLR gradle plugin and runtime dependency
antlr "org.antlr:antlr4:4.7.1"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,6 @@ public class LocalClusterState {
*/
private final Map<String, Object> latestSettings = new ConcurrentHashMap<>();


public static synchronized LocalClusterState state() {
if (INSTANCE == null) {
INSTANCE = new LocalClusterState();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
/*
* Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/

package com.amazon.opendistroforelasticsearch.sql.executor.format;

import com.amazon.opendistroforelasticsearch.sql.esdomain.LocalClusterState;
import com.amazon.opendistroforelasticsearch.sql.esdomain.mapping.FieldMappings;
import com.amazon.opendistroforelasticsearch.sql.esdomain.mapping.TypeMappings;
import com.google.common.annotations.VisibleForTesting;
import org.apache.commons.lang3.time.DateUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import java.text.ParseException;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;

/**
* Formatter to transform date fields into a consistent format for consumption by clients.
*/
public class DateFieldFormatter {
dai-chen marked this conversation as resolved.
Show resolved Hide resolved
private static final Logger LOG = LogManager.getLogger(DateFieldFormatter.class);
private static final String FORMAT_JDBC = "yyyy-MM-dd HH:mm:ss.SSS";

private static final String FORMAT_DOT_DATE_AND_TIME = "yyyy-MM-dd'T'HH:mm:ss.SSSZ";
private static final String FORMAT_DOT_KIBANA_SAMPLE_DATA_LOGS_EXCEPTION = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'";
private static final String FORMAT_DOT_KIBANA_SAMPLE_DATA_FLIGHTS_EXCEPTION = "yyyy-MM-dd'T'HH:mm:ss";
private static final String FORMAT_DOT_KIBANA_SAMPLE_DATA_ECOMMERCE_EXCEPTION = "yyyy-MM-dd'T'HH:mm:ssXXX";
private static final String FORMAT_DOT_DATE = "yyyy-MM-dd";
penghuo marked this conversation as resolved.
Show resolved Hide resolved

private final Map<String, String> dateFieldFormatMap;
private final Map<String, String> fieldAliasMap;
private Set<String> dateColumns;

public DateFieldFormatter(String indexName, List<Schema.Column> columns, Map<String, String> fieldAliasMap) {
this.dateFieldFormatMap = getDateFieldFormatMap(indexName);
this.dateColumns = getDateColumns(columns);
this.fieldAliasMap = fieldAliasMap;
}

@VisibleForTesting
protected DateFieldFormatter(Map<String, String> dateFieldFormatMap,
List<Schema.Column> columns,
Map<String, String> fieldAliasMap) {
this.dateFieldFormatMap = dateFieldFormatMap;
this.dateColumns = getDateColumns(columns);
this.fieldAliasMap = fieldAliasMap;
}

/**
* Apply the JDBC date format ({@code yyyy-MM-dd HH:mm:ss.SSS}) to date values in the current row.
*
* @param rowSource The row in which to format the date values.
*/
public void applyJDBCDateFormat(Map<String, Object> rowSource) {
for (String columnName : dateColumns) {
Object columnOriginalDate = rowSource.get(columnName);
if (columnOriginalDate == null) {
// Don't try to parse null date values
continue;
}

String columnFormat = getFormatForColumn(columnName);
if (columnFormat == null) {
LOG.warn("Could not determine date format for column {}; returning original value", columnName);
continue;
}
DateFormat format = DateFormat.valueOf(columnFormat.toUpperCase());
penghuo marked this conversation as resolved.
Show resolved Hide resolved

Date date = parseDateString(format, columnOriginalDate.toString());
if (date != null) {
rowSource.put(columnName, DateFormat.getFormattedDate(date, FORMAT_JDBC));
} else {
LOG.warn("Could not parse date value; returning original value");
}
}
}

private String getFormatForColumn(String columnName) {
// Handle special cases for column names
if (fieldAliasMap.get(columnName) != null) {
// Column was aliased, and we need to find the base name for the column
columnName = fieldAliasMap.get(columnName);
} else if (columnName.split("\\.").length == 2) {
// Column is part of a join, and is qualified by the table alias
columnName = columnName.split("\\.")[1];
}
return dateFieldFormatMap.get(columnName);
}

private Set<String> getDateColumns(List<Schema.Column> columns) {
return columns.stream()
.filter(column -> column.getType().equals(Schema.Type.DATE.nameLowerCase()))
.map(Schema.Column::getName)
.collect(Collectors.toSet());
}

private Map<String, String> getDateFieldFormatMap(String indexName) {
LocalClusterState state = LocalClusterState.state();
Map<String, String> formatMap = new HashMap<>();

String[] indices = indexName.split("\\|");
Collection<TypeMappings> typeProperties = state.getFieldMappings(indices)
.allMappings();

for (TypeMappings mappings: typeProperties) {
FieldMappings fieldMappings = mappings.firstMapping();
for (Map.Entry<String, Map<String, Object>> field : fieldMappings.data().entrySet()) {
String fieldName = field.getKey();
Map<String, Object> properties = field.getValue();

if (properties.containsKey("format")) {
penghuo marked this conversation as resolved.
Show resolved Hide resolved
formatMap.put(fieldName, properties.get("format").toString());
} else {
// Give all field types a format, since operations such as casts
// can change the output type for a field to `date`.
formatMap.put(fieldName, "date_optional_time");
}
}
}

return formatMap;
}

private Date parseDateString(DateFormat format, String columnOriginalDate) {
try {
switch (format) {
case DATE_OPTIONAL_TIME:
return DateUtils.parseDate(
columnOriginalDate,
FORMAT_DOT_KIBANA_SAMPLE_DATA_LOGS_EXCEPTION,
FORMAT_DOT_KIBANA_SAMPLE_DATA_FLIGHTS_EXCEPTION,
FORMAT_DOT_KIBANA_SAMPLE_DATA_ECOMMERCE_EXCEPTION,
FORMAT_DOT_DATE_AND_TIME,
FORMAT_DOT_DATE);
case EPOCH_MILLIS:
return new Date(Long.parseLong(columnOriginalDate));
case EPOCH_SECOND:
return new Date(Long.parseLong(columnOriginalDate) * 1000);
default:
return DateUtils.parseDate(columnOriginalDate, format.getFormatString());
}
} catch (ParseException e) {
LOG.error(
String.format("Error parsing date string %s as %s", columnOriginalDate, format.nameLowerCase()),
e);
}
return null;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
/*
* Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/

package com.amazon.opendistroforelasticsearch.sql.executor.format;

import java.time.Instant;
import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;

public enum DateFormat {
// Special cases that are parsed separately
DATE_OPTIONAL_TIME(""),
EPOCH_MILLIS(""),
EPOCH_SECOND(""),

BASIC_DATE(Date.BASIC_DATE),
BASIC_DATE_TIME(Date.BASIC_DATE + Time.T + Time.BASIC_TIME + Time.MILLIS + Time.TZ),
BASIC_DATE_TIME_NO_MILLIS(Date.BASIC_DATE + Time.T + Time.BASIC_TIME + Time.TZ),

BASIC_ORDINAL_DATE(Date.BASIC_ORDINAL_DATE),
BASIC_ORDINAL_DATE_TIME(Date.BASIC_ORDINAL_DATE + Time.T + Time.BASIC_TIME + Time.MILLIS + Time.TZ),
BASIC_ORDINAL_DATE_TIME_NO_MILLIS(Date.BASIC_ORDINAL_DATE+ Time.T + Time.BASIC_TIME + Time.TZ),

BASIC_TIME(Time.BASIC_TIME + Time.MILLIS + Time.TZ),
BASIC_TIME_NO_MILLIS(Time.BASIC_TIME + Time.TZ),

BASIC_T_TIME(Time.T + Time.BASIC_TIME + Time.MILLIS + Time.TZ),
BASIC_T_TIME_NO_MILLIS(Time.T + Time.BASIC_TIME + Time.TZ),

BASIC_WEEK_DATE(Date.BASIC_WEEK_DATE),
BASIC_WEEK_DATE_TIME(Date.BASIC_WEEK_DATE + Time.T + Time.BASIC_TIME + Time.MILLIS + Time.TZ),
BASIC_WEEK_DATE_TIME_NO_MILLIS(Date.BASIC_WEEK_DATE + Time.T + Time.BASIC_TIME + Time.TZ),

DATE(Date.DATE),
DATE_HOUR(Date.DATE + Time.T + Time.HOUR),
DATE_HOUR_MINUTE(Date.DATE + Time.T + Time.HOUR_MINUTE),
DATE_HOUR_MINUTE_SECOND(Date.DATE + Time.T + Time.TIME),
DATE_HOUR_MINUTE_SECOND_FRACTION(Date.DATE + Time.T + Time.TIME + Time.MILLIS),
DATE_HOUR_MINUTE_SECOND_MILLIS(Date.DATE + Time.T + Time.TIME + Time.MILLIS),
DATE_TIME(Date.DATE + Time.T + Time.TIME + Time.MILLIS + Time.TZZ),
DATE_TIME_NO_MILLIS(Date.DATE + Time.T + Time.TIME + Time.TZZ),

HOUR(Time.HOUR),
HOUR_MINUTE(Time.HOUR_MINUTE),
HOUR_MINUTE_SECOND(Time.TIME),
HOUR_MINUTE_SECOND_FRACTION(Time.TIME + Time.MILLIS),
HOUR_MINUTE_SECOND_MILLIS(Time.TIME + Time.MILLIS),

ORDINAL_DATE(Date.ORDINAL_DATE),
ORDINAL_DATE_TIME(Date.ORDINAL_DATE + Time.T + Time.TIME + Time.MILLIS + Time.TZZ),
ORDINAL_DATE_TIME_NO_MILLIS(Date.ORDINAL_DATE + Time.T + Time.TIME + Time.TZZ),

TIME(Time.TIME + Time.MILLIS + Time.TZZ),
TIME_NO_MILLIS(Time.TIME + Time.TZZ),

T_TIME(Time.T + Time.TIME + Time.MILLIS + Time.TZZ),
T_TIME_NO_MILLIS(Time.T + Time.TIME + Time.TZZ),

WEEK_DATE(Date.WEEK_DATE),
WEEK_DATE_TIME(Date.WEEK_DATE + Time.T + Time.TIME + Time.MILLIS + Time.TZZ),
WEEK_DATE_TIME_NO_MILLIS(Date.WEEK_DATE + Time.T + Time.TIME + Time.TZZ),

// Note: input mapping is "weekyear", but output value is "week_year"
WEEK_YEAR(Date.WEEKYEAR),
WEEKYEAR_WEEK(Date.WEEKYEAR_WEEK),
WEEKYEAR_WEEK_DAY(Date.WEEK_DATE),

YEAR(Date.YEAR),
YEAR_MONTH(Date.YEAR_MONTH),
YEAR_MONTH_DAY(Date.DATE);

private static class Date {
static String BASIC_DATE = "yyyyMMdd";
static String BASIC_ORDINAL_DATE = "yyyyDDD";
static String BASIC_WEEK_DATE = "YYYY'W'wwu";

static String DATE = "yyyy-MM-dd";
static String ORDINAL_DATE = "yyyy-DDD";

static String YEAR = "yyyy";
static String YEAR_MONTH = "yyyy-MM";

static String WEEK_DATE = "YYYY-'W'ww-u";
static String WEEKYEAR = "YYYY";
static String WEEKYEAR_WEEK = "YYYY-'W'ww";
}

private static class Time {
static String T = "'T'";
static String BASIC_TIME = "HHmmss";
static String TIME = "HH:mm:ss";

static String HOUR = "HH";
static String HOUR_MINUTE = "HH:mm";

static String MILLIS = ".SSS";
static String TZ = "Z";
static String TZZ = "XX";
}

private String formatString;

DateFormat(String formatString) {
this.formatString = formatString;
}

public String getFormatString() {
return formatString;
}

public String nameLowerCase() {
return name().toLowerCase();
}

public static String getFormattedDate(java.util.Date date, String dateFormat) {
Instant instant = date.toInstant();
ZonedDateTime zdt = ZonedDateTime.ofInstant(instant, ZoneId.of("Etc/UTC"));
return zdt.format(DateTimeFormatter.ofPattern(dateFormat));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ private ResultSet loadResultSet(Client client, QueryStatement queryStatement, Ob
if (queryStatement instanceof Delete) {
return new DeleteResultSet(client, (Delete) queryStatement, queryResult);
} else if (queryStatement instanceof Query) {
return new SelectResultSet(client, (Query) queryStatement, queryResult, scriptColumnType);
return new SelectResultSet(client, (Query) queryStatement, queryResult, scriptColumnType, formatType);
} else if (queryStatement instanceof IndexStatement) {
IndexStatement statement = (IndexStatement) queryStatement;
StatementType statementType = statement.getStatementType();
Expand Down
Loading