Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backport 2.16] [Backport 2.x] Add support for custom date formats and OpenSearch date formats for date fields as part of Lucene query #2851

Merged
merged 1 commit into from
Jul 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions docs/user/general/datatypes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,48 @@ Querying such index will provide a response with ``schema`` block as shown below
"status": 200
}

If the sql query contains an `IndexDateField` and a literal value with an operator (such as a term query or a range query), then the literal value can be in the `IndexDateField` format.

.. code-block:: json

{
"mappings" : {
"properties" : {
"release_date" : {
"type" : "date",
"format": "dd-MMM-yy"
}
}
}
}

Querying such an `IndexDateField` (``release_date``) will provide a response with ``schema`` and ``datarows`` blocks as shown below.

.. code-block:: json

{
"query" : "SELECT release_date FROM test_index WHERE release_date = \"03-Jan-21\""
}

.. code-block:: json

{
"schema": [
{
"name": "release_date",
"type": "date"
}
],
"datarows": [
[
"2021-01-03"
]
],
"total": 1,
"size": 1,
"status": 200
}

String Data Types
=================

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,19 +62,23 @@ public String toString() {
@EqualsAndHashCode.Exclude @Getter protected MappingType mappingType;

// resolved ExprCoreType
protected ExprCoreType exprCoreType;
@Getter protected ExprCoreType exprCoreType;

/**
* Get a simplified type {@link ExprCoreType} if possible. To avoid returning `UNKNOWN` for
* `OpenSearch*Type`s, e.g. for IP, returns itself.
* `OpenSearch*Type`s, e.g. for IP, returns itself. If the `exprCoreType` is {@link
* ExprCoreType#DATE}, {@link ExprCoreType#TIMESTAMP}, {@link ExprCoreType#TIME}, or {@link
* ExprCoreType#UNKNOWN}, it returns the current instance; otherwise, it returns `exprCoreType`.
*
* @return An {@link ExprType}.
*/
public ExprType getExprType() {
if (exprCoreType != ExprCoreType.UNKNOWN) {
return exprCoreType;
}
return this;
return (exprCoreType == ExprCoreType.DATE
|| exprCoreType == ExprCoreType.TIMESTAMP
|| exprCoreType == ExprCoreType.TIME
|| exprCoreType == ExprCoreType.UNKNOWN)
? this
: exprCoreType;
}

/**
Expand Down Expand Up @@ -230,6 +234,9 @@ public String legacyTypeName() {
if (mappingType == null) {
return exprCoreType.typeName();
}
if (mappingType.toString().equalsIgnoreCase("DATE")) {
return exprCoreType.typeName();
}
return mappingType.toString().toUpperCase();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,16 @@
import static org.opensearch.sql.data.type.ExprCoreType.TIME;
import static org.opensearch.sql.data.type.ExprCoreType.TIMESTAMP;

import java.time.ZoneOffset;
import java.time.ZonedDateTime;
import java.time.temporal.TemporalAccessor;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import lombok.EqualsAndHashCode;
import org.opensearch.common.time.DateFormatter;
import org.opensearch.common.time.DateFormatters;
import org.opensearch.common.time.FormatNames;
import org.opensearch.sql.data.type.ExprCoreType;
import org.opensearch.sql.data.type.ExprType;
Expand Down Expand Up @@ -137,6 +142,11 @@ public class OpenSearchDateType extends OpenSearchDataType {

private static final String CUSTOM_FORMAT_DATE_SYMBOLS = "FecEWwYqQgdMLDyuG";

private static final List<DateFormatter> OPENSEARCH_DEFAULT_FORMATTERS =
Stream.of("strict_date_time_no_millis", "strict_date_optional_time", "epoch_millis")
.map(DateFormatter::forPattern)
.collect(Collectors.toList());

@EqualsAndHashCode.Exclude private final List<String> formats;

private OpenSearchDateType() {
Expand Down Expand Up @@ -235,6 +245,59 @@ public List<DateFormatter> getAllCustomFormatters() {
.collect(Collectors.toList());
}

/**
* Retrieves a list of custom formatters and OpenSearch named formatters defined by the user, and
* attempts to parse the given date/time string using these formatters.
*
* @param dateTime The date/time string to parse.
* @return A ZonedDateTime representing the parsed date/time in UTC, or null if parsing fails.
*/
public ZonedDateTime getParsedDateTime(String dateTime) {
List<DateFormatter> dateFormatters =
Stream.concat(this.getAllNamedFormatters().stream(), this.getAllCustomFormatters().stream())
.collect(Collectors.toList());
ZonedDateTime zonedDateTime = null;

// check if dateFormatters are empty, then set default ones
if (dateFormatters.isEmpty()) {
dateFormatters = OPENSEARCH_DEFAULT_FORMATTERS;
}
// parse using OpenSearch DateFormatters
for (DateFormatter formatter : dateFormatters) {
try {
TemporalAccessor accessor = formatter.parse(dateTime);
zonedDateTime = DateFormatters.from(accessor).withZoneSameLocal(ZoneOffset.UTC);
break;
} catch (IllegalArgumentException ignored) {
// nothing to do, try another format
}
}
return zonedDateTime;
}

/**
* Returns a formatted date string using the internal formatter, if available.
*
* @param accessor The TemporalAccessor object containing the date/time information.
* @return A formatted date string if a formatter is available, otherwise null.
*/
public String getFormattedDate(TemporalAccessor accessor) {
if (hasNoFormatter()) {
return OPENSEARCH_DEFAULT_FORMATTERS.get(0).format(accessor);
}
// Use the first available format string to create the formatter
return DateFormatter.forPattern(this.formats.get(0)).format(accessor);
}

/**
* Checks if the formatter is not initialized.
*
* @return True if the formatter is not set, otherwise false.
*/
public boolean hasNoFormatter() {
return this.formats.isEmpty();
}

/**
* Retrieves a list of named formatters that format for dates.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ private Optional<ExprType> type(String field) {
private static ExprValue parseDateTimeString(String value, OpenSearchDateType dataType) {
List<DateFormatter> formatters = dataType.getAllNamedFormatters();
formatters.addAll(dataType.getAllCustomFormatters());
ExprCoreType returnFormat = (ExprCoreType) dataType.getExprType();
ExprCoreType returnFormat = dataType.getExprCoreType();

for (DateFormatter formatter : formatters) {
try {
Expand Down Expand Up @@ -268,8 +268,7 @@ private static ExprValue parseDateTimeString(String value, OpenSearchDateType da

private static ExprValue createOpenSearchDateType(Content value, ExprType type) {
OpenSearchDateType dt = (OpenSearchDateType) type;
ExprType returnFormat = dt.getExprType();

ExprCoreType returnFormat = dt.getExprCoreType();
if (value.isNumber()) { // isNumber
var numFormatters = dt.getNumericNamedFormatters();
if (numFormatters.size() > 0 || !dt.hasFormats()) {
Expand All @@ -282,7 +281,7 @@ private static ExprValue createOpenSearchDateType(Content value, ExprType type)
epochMillis = value.longValue();
}
Instant instant = Instant.ofEpochMilli(epochMillis);
switch ((ExprCoreType) returnFormat) {
switch (returnFormat) {
case TIME:
return new ExprTimeValue(LocalTime.from(instant.atZone(UTC_ZONE_ID)));
case DATE:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.opensearch.sql.ast.expression.SpanUnit;
import org.opensearch.sql.expression.NamedExpression;
import org.opensearch.sql.expression.span.SpanExpression;
import org.opensearch.sql.opensearch.data.type.OpenSearchDateType;
import org.opensearch.sql.opensearch.storage.serialization.ExpressionSerializer;

/** Bucket Aggregation Builder. */
Expand Down Expand Up @@ -66,7 +67,10 @@ private CompositeValuesSourceBuilder<?> buildCompositeValuesSourceBuilder(
.missingOrder(missingOrder)
.order(sortOrder);
// Time types values are converted to LONG in ExpressionAggregationScript::execute
if (List.of(TIMESTAMP, TIME, DATE, DATETIME).contains(expr.getDelegated().type())) {
if ((expr.getDelegated().type() instanceof OpenSearchDateType
&& List.of(TIMESTAMP, TIME, DATE, DATETIME)
.contains(((OpenSearchDateType) expr.getDelegated().type()).getExprCoreType()))
|| List.of(TIMESTAMP, TIME, DATE, DATETIME).contains(expr.getDelegated().type())) {
sourceBuilder.userValuetypeHint(ValueType.LONG);
}
return helper.build(expr.getDelegated(), sourceBuilder::field, sourceBuilder::script);
Expand Down
Loading
Loading