Skip to content
This repository has been archived by the owner on Aug 2, 2022. It is now read-only.

Report date data as a standardized format #367

Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@ dependencies {
compile group: "org.elasticsearch.plugin", name: 'reindex-client', version: "${es_version}"
compile group: 'com.google.guava', name: 'guava', version:'15.0'
compile group: 'org.json', name: 'json', version:'20180813'
compile group: 'org.apache.commons', name: 'commons-lang3', version: '3.9'

// ANTLR gradle plugin and runtime dependency
antlr "org.antlr:antlr4:4.7.1"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@

import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
Expand Down Expand Up @@ -91,6 +92,7 @@ public class LocalClusterState {
*/
private final Map<String, Object> latestSettings = new ConcurrentHashMap<>();

private Map<String, String> fieldAliasMap = new HashMap<>();

public static synchronized LocalClusterState state() {
if (INSTANCE == null) {
Expand Down Expand Up @@ -209,6 +211,27 @@ public IndexMappings getFieldMappings(String[] indices, String[] types,
}
}

/**
* Save an alias for the provided field, which may be used in place of the field name in certain circumstances.
*
* @param alias The name of the alias for the field.
* @param field The base field for the alias.
*/
public void putAliasInMap(String alias, String field) {
fieldAliasMap.put(alias, field);
}

/**
* Get the base field for a given alias.
*
* @param alias The alias for which to find the base field.
* @return The base field for the alias.
*/
public String getFieldForAlias(String alias) {
return fieldAliasMap.get(alias);
}


private String[] resolveIndexExpression(ClusterState state, String[] indices) {
String[] concreteIndices = resolver.concreteIndexNames(state, IndicesOptions.strictExpandOpen(), indices);

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
/*
* Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/

package com.amazon.opendistroforelasticsearch.sql.executor.format;

import com.amazon.opendistroforelasticsearch.sql.esdomain.LocalClusterState;
import com.amazon.opendistroforelasticsearch.sql.esdomain.mapping.FieldMappings;
import com.amazon.opendistroforelasticsearch.sql.esdomain.mapping.TypeMappings;
import com.google.common.annotations.VisibleForTesting;
import org.apache.commons.lang3.time.DateUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import java.text.ParseException;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

/**
* Formatter to transform date fields into a consistent format for consumption by clients.
*/
public class DateFieldFormatter {
dai-chen marked this conversation as resolved.
Show resolved Hide resolved
private static final Logger LOG = LogManager.getLogger(DateFieldFormatter.class);
private static final String FORMAT_JDBC = "yyyy-MM-dd HH:mm:ss.SSS";

private static final String FORMAT_DOT_DATE_AND_TIME = "yyyy-MM-dd'T'HH:mm:ss.SSSZ";
private static final String FORMAT_DOT_KIBANA_SAMPLE_DATA_LOGS_EXCEPTION = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'";
private static final String FORMAT_DOT_KIBANA_SAMPLE_DATA_FLIGHTS_EXCEPTION = "yyyy-MM-dd'T'HH:mm:ss";
private static final String FORMAT_DOT_KIBANA_SAMPLE_DATA_ECOMMERCE_EXCEPTION = "yyyy-MM-dd'T'HH:mm:ssXXX";
private static final String FORMAT_DOT_DATE = "yyyy-MM-dd";
penghuo marked this conversation as resolved.
Show resolved Hide resolved

private final Map<String, String> dateFieldFormatMap;
private List<Schema.Column> dateColumns;

public DateFieldFormatter(String indexName, List<Schema.Column> columns) {
this.dateFieldFormatMap = getDateFieldFormatMap(indexName);
this.dateColumns = getDateColumns(columns);
}

@VisibleForTesting
protected DateFieldFormatter(Map<String, String> dateFieldFormatMap, List<Schema.Column> columns) {
this.dateFieldFormatMap = dateFieldFormatMap;
this.dateColumns = getDateColumns(columns);
}

/**
* Apply the JDBC date format ({@code yyyy-MM-dd HH:mm:ss.SSS}) to date values in the current row.
*
* @param rowSource The row in which to format the date values.
*/
public void applyJDBCDateFormat(Map<String, Object> rowSource) {
for (Schema.Column column : dateColumns) {
String columnName = column.getName();

String columnFormat;
columnFormat = getFormatForColumn(columnName);
if (columnFormat == null) {
LOG.warn("Could not determine date format for column {}; returning original value", columnName);
continue;
}
DateFormat format = DateFormat.valueOf(columnFormat.toUpperCase());
penghuo marked this conversation as resolved.
Show resolved Hide resolved

Object columnOriginalDate = rowSource.get(columnName);
if (columnOriginalDate == null) {
// Don't try to parse null date values
continue;
}

Date date = parseDateString(format, columnOriginalDate.toString());
if (date != null) {
rowSource.put(columnName, DateFormat.getFormattedDate(date, FORMAT_JDBC));
} else {
LOG.warn("Could not parse date value; returning original value");
}
}
}

private String getFormatForColumn(String columnName) {
// Handle special cases for column names
if (LocalClusterState.state().getFieldForAlias(columnName) != null) {
// Column was aliased, and we need to find the base name for the column
columnName = LocalClusterState.state().getFieldForAlias(columnName);
} else if (columnName.split("\\.").length == 2) {
// Column is part of a join, and is qualified by the table alias
columnName = columnName.split("\\.")[1];
}
return dateFieldFormatMap.get(columnName);
}

private List<Schema.Column> getDateColumns(List<Schema.Column> columns) {
return columns.stream()
.filter(column -> column.getType().equals(Schema.Type.DATE.nameLowerCase()))
.collect(Collectors.toList());
}

private Map<String, String> getDateFieldFormatMap(String indexName) {
LocalClusterState state = LocalClusterState.state();
Map<String, String> formatMap = new HashMap<>();

String[] indices = indexName.split("\\|");
Collection<TypeMappings> typeProperties = state.getFieldMappings(indices)
.allMappings();

for (TypeMappings mappings: typeProperties) {
FieldMappings fieldMappings = mappings.firstMapping();
for (Map.Entry<String, Map<String, Object>> field : fieldMappings.data().entrySet()) {
String fieldName = field.getKey();
Map<String, Object> properties = field.getValue();

if (properties.containsKey("format")) {
penghuo marked this conversation as resolved.
Show resolved Hide resolved
formatMap.put(fieldName, properties.get("format").toString());
} else {
// Give all field types a format, since operations such as casts
// can change the output type for a field to `date`.
formatMap.put(fieldName, "date_optional_time");
}
}
}

return formatMap;
}

private Date parseDateString(DateFormat format, String columnOriginalDate) {
try {
switch (format) {
case DATE_OPTIONAL_TIME:
return DateUtils.parseDate(
columnOriginalDate,
FORMAT_DOT_KIBANA_SAMPLE_DATA_LOGS_EXCEPTION,
FORMAT_DOT_KIBANA_SAMPLE_DATA_FLIGHTS_EXCEPTION,
FORMAT_DOT_KIBANA_SAMPLE_DATA_ECOMMERCE_EXCEPTION,
FORMAT_DOT_DATE_AND_TIME,
FORMAT_DOT_DATE);
case EPOCH_MILLIS:
return new Date(Long.parseLong(columnOriginalDate));
case EPOCH_SECOND:
return new Date(Long.parseLong(columnOriginalDate) * 1000);
default:
return DateUtils.parseDate(columnOriginalDate, format.getFormatString());
}
} catch (ParseException e) {
LOG.error(
String.format("Error parsing date string %s as %s", columnOriginalDate, format.nameLowerCase()),
e);
}
return null;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
/*
* Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/

package com.amazon.opendistroforelasticsearch.sql.executor.format;

import java.time.Instant;
import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;

public enum DateFormat {
// Special cases that are parsed separately
DATE_OPTIONAL_TIME(""),
EPOCH_MILLIS(""),
EPOCH_SECOND(""),

BASIC_DATE(Date.BASIC_DATE),
BASIC_DATE_TIME(Date.BASIC_DATE + Time.T + Time.BASIC_TIME + Time.MILLIS + Time.TZ),
BASIC_DATE_TIME_NO_MILLIS(Date.BASIC_DATE + Time.T + Time.BASIC_TIME + Time.TZ),

BASIC_ORDINAL_DATE(Date.BASIC_ORDINAL_DATE),
BASIC_ORDINAL_DATE_TIME(Date.BASIC_ORDINAL_DATE + Time.T + Time.BASIC_TIME + Time.MILLIS + Time.TZ),
BASIC_ORDINAL_DATE_TIME_NO_MILLIS(Date.BASIC_ORDINAL_DATE+ Time.T + Time.BASIC_TIME + Time.TZ),

BASIC_TIME(Time.BASIC_TIME + Time.MILLIS + Time.TZ),
BASIC_TIME_NO_MILLIS(Time.BASIC_TIME + Time.TZ),

BASIC_T_TIME(Time.T + Time.BASIC_TIME + Time.MILLIS + Time.TZ),
BASIC_T_TIME_NO_MILLIS(Time.T + Time.BASIC_TIME + Time.TZ),

BASIC_WEEK_DATE(Date.BASIC_WEEK_DATE),
BASIC_WEEK_DATE_TIME(Date.BASIC_WEEK_DATE + Time.T + Time.BASIC_TIME + Time.MILLIS + Time.TZ),
BASIC_WEEK_DATE_TIME_NO_MILLIS(Date.BASIC_WEEK_DATE + Time.T + Time.BASIC_TIME + Time.TZ),

DATE(Date.DATE),
DATE_HOUR(Date.DATE + Time.T + Time.HOUR),
DATE_HOUR_MINUTE(Date.DATE + Time.T + Time.HOUR_MINUTE),
DATE_HOUR_MINUTE_SECOND(Date.DATE + Time.T + Time.TIME),
DATE_HOUR_MINUTE_SECOND_FRACTION(Date.DATE + Time.T + Time.TIME + Time.MILLIS),
DATE_HOUR_MINUTE_SECOND_MILLIS(Date.DATE + Time.T + Time.TIME + Time.MILLIS),
DATE_TIME(Date.DATE + Time.T + Time.TIME + Time.MILLIS + Time.TZZ),
DATE_TIME_NO_MILLIS(Date.DATE + Time.T + Time.TIME + Time.TZZ),

HOUR(Time.HOUR),
HOUR_MINUTE(Time.HOUR_MINUTE),
HOUR_MINUTE_SECOND(Time.TIME),
HOUR_MINUTE_SECOND_FRACTION(Time.TIME + Time.MILLIS),
HOUR_MINUTE_SECOND_MILLIS(Time.TIME + Time.MILLIS),

ORDINAL_DATE(Date.ORDINAL_DATE),
ORDINAL_DATE_TIME(Date.ORDINAL_DATE + Time.T + Time.TIME + Time.MILLIS + Time.TZZ),
ORDINAL_DATE_TIME_NO_MILLIS(Date.ORDINAL_DATE + Time.T + Time.TIME + Time.TZZ),

TIME(Time.TIME + Time.MILLIS + Time.TZZ),
TIME_NO_MILLIS(Time.TIME + Time.TZZ),

T_TIME(Time.T + Time.TIME + Time.MILLIS + Time.TZZ),
T_TIME_NO_MILLIS(Time.T + Time.TIME + Time.TZZ),

WEEK_DATE(Date.WEEK_DATE),
WEEK_DATE_TIME(Date.WEEK_DATE + Time.T + Time.TIME + Time.MILLIS + Time.TZZ),
WEEK_DATE_TIME_NO_MILLIS(Date.WEEK_DATE + Time.T + Time.TIME + Time.TZZ),

// Note: input mapping is "weekyear", but output value is "week_year"
WEEK_YEAR(Date.WEEKYEAR),
WEEKYEAR_WEEK(Date.WEEKYEAR_WEEK),
WEEKYEAR_WEEK_DAY(Date.WEEK_DATE),

YEAR(Date.YEAR),
YEAR_MONTH(Date.YEAR_MONTH),
YEAR_MONTH_DAY(Date.DATE);

private static class Date {
static String BASIC_DATE = "yyyyMMdd";
static String BASIC_ORDINAL_DATE = "yyyyDDD";
static String BASIC_WEEK_DATE = "YYYY'W'wwu";

static String DATE = "yyyy-MM-dd";
static String ORDINAL_DATE = "yyyy-DDD";

static String YEAR = "yyyy";
static String YEAR_MONTH = "yyyy-MM";

static String WEEK_DATE = "YYYY-'W'ww-u";
static String WEEKYEAR = "YYYY";
static String WEEKYEAR_WEEK = "YYYY-'W'ww";
}

private static class Time {
static String T = "'T'";
static String BASIC_TIME = "HHmmss";
static String TIME = "HH:mm:ss";

static String HOUR = "HH";
static String HOUR_MINUTE = "HH:mm";

static String MILLIS = ".SSS";
static String TZ = "Z";
static String TZZ = "XX";
}

private String formatString;

DateFormat(String formatString) {
this.formatString = formatString;
}

public String getFormatString() {
return formatString;
}

public String nameLowerCase() {
return name().toLowerCase();
}

public static String getFormattedDate(java.util.Date date, String dateFormat) {
Instant instant = date.toInstant();
ZonedDateTime zdt = ZonedDateTime.ofInstant(instant, ZoneId.of("Etc/UTC"));
return zdt.format(DateTimeFormatter.ofPattern(dateFormat));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ private ResultSet loadResultSet(Client client, QueryStatement queryStatement, Ob
if (queryStatement instanceof Delete) {
return new DeleteResultSet(client, (Delete) queryStatement, queryResult);
} else if (queryStatement instanceof Query) {
return new SelectResultSet(client, (Query) queryStatement, queryResult, scriptColumnType);
return new SelectResultSet(client, (Query) queryStatement, queryResult, scriptColumnType, formatType);
} else if (queryStatement instanceof IndexStatement) {
IndexStatement statement = (IndexStatement) queryStatement;
StatementType statementType = statement.getStatementType();
Expand Down
Loading