diff --git a/core/src/main/java/org/opensearch/sql/expression/DSL.java b/core/src/main/java/org/opensearch/sql/expression/DSL.java index 3c736fdcdf..a238d0487d 100644 --- a/core/src/main/java/org/opensearch/sql/expression/DSL.java +++ b/core/src/main/java/org/opensearch/sql/expression/DSL.java @@ -497,6 +497,12 @@ public static FunctionExpression module(Expression... expressions) { return compile(FunctionProperties.None, BuiltinFunctionName.MODULES, expressions); } + + public static FunctionExpression str_to_date(FunctionProperties functionProperties, + Expression... expressions) { + return compile(functionProperties, BuiltinFunctionName.STR_TO_DATE, expressions); + } + public static FunctionExpression sec_to_time(Expression... expressions) { return compile(FunctionProperties.None, BuiltinFunctionName.SEC_TO_TIME, expressions); } diff --git a/core/src/main/java/org/opensearch/sql/expression/datetime/DateTimeFormatterUtil.java b/core/src/main/java/org/opensearch/sql/expression/datetime/DateTimeFormatterUtil.java index c5efb2dc5f..55bfa67f3f 100644 --- a/core/src/main/java/org/opensearch/sql/expression/datetime/DateTimeFormatterUtil.java +++ b/core/src/main/java/org/opensearch/sql/expression/datetime/DateTimeFormatterUtil.java @@ -6,17 +6,26 @@ package org.opensearch.sql.expression.datetime; import com.google.common.collect.ImmutableMap; +import java.text.ParsePosition; import java.time.Clock; +import java.time.DateTimeException; import java.time.LocalDate; import java.time.LocalDateTime; +import java.time.LocalTime; import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeFormatterBuilder; +import java.time.format.ResolverStyle; +import java.time.temporal.ChronoField; +import java.time.temporal.TemporalAccessor; import java.util.Locale; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.opensearch.sql.data.model.ExprDatetimeValue; import org.opensearch.sql.data.model.ExprNullValue; import org.opensearch.sql.data.model.ExprStringValue; import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.expression.function.FunctionProperties; /** * This class converts a SQL style DATE_FORMAT format specifier and converts it to a @@ -28,6 +37,7 @@ class DateTimeFormatterUtil { private static final String SUFFIX_SPECIAL_TH = "th"; private static final String NANO_SEC_FORMAT = "'%06d'"; + private static final Map SUFFIX_CONVERTER = ImmutableMap.builder() .put(1, "st").put(2, "nd").put(3, "rd").build(); @@ -122,6 +132,43 @@ interface DateTimeFormatHandler { .put("%x", (date) -> null) .build(); + private static final Map STR_TO_DATE_FORMATS = + ImmutableMap.builder() + .put("%a", "EEE") // %a => EEE - Abbreviated weekday name (Sun..Sat) + .put("%b", "LLL") // %b => LLL - Abbreviated month name (Jan..Dec) + .put("%c", "M") // %c => MM - Month, numeric (0..12) + .put("%d", "d") // %d => dd - Day of the month, numeric (00..31) + .put("%e", "d") // %e => d - Day of the month, numeric (0..31) + .put("%H", "H") // %H => HH - (00..23) + .put("%h", "H") // %h => hh - (01..12) + .put("%I", "h") // %I => hh - (01..12) + .put("%i", "m") // %i => mm - Minutes, numeric (00..59) + .put("%j", "DDD") // %j => DDD - (001..366) + .put("%k", "H") // %k => H - (0..23) + .put("%l", "h") // %l => h - (1..12) + .put("%p", "a") // %p => a - AM or PM + .put("%M", "LLLL") // %M => LLLL - Month name (January..December) + .put("%m", "M") // %m => MM - Month, numeric (00..12) + .put("%r", "hh:mm:ss a") // %r => hh:mm:ss a - hh:mm:ss followed by AM or PM + .put("%S", "s") // %S => ss - Seconds (00..59) + .put("%s", "s") // %s => ss - Seconds (00..59) + .put("%T", "HH:mm:ss") // %T => HH:mm:ss + .put("%W", "EEEE") // %W => EEEE - Weekday name (Sunday..Saturday) + .put("%Y", "u") // %Y => yyyy - Year, numeric, 4 digits + .put("%y", "u") // %y => yy - Year, numeric, 2 digits + .put("%f", "n") // %f => n - Nanoseconds + //The following have been implemented but cannot be aligned with + // MySQL due to the limitations of the DatetimeFormatter + .put("%D", "d") // %w - Day of month with English suffix + .put("%w", "e") // %w - Day of week (0 indexed) + .put("%U", "w") // %U Week where Sunday is the first day - WEEK() mode 0 + .put("%u", "w") // %u Week where Monday is the first day - WEEK() mode 1 + .put("%V", "w") // %V Week where Sunday is the first day - WEEK() mode 2 + .put("%v", "w") // %v Week where Monday is the first day - WEEK() mode 3 + .put("%X", "u") // %X Year for week where Sunday is the first day + .put("%x", "u") // %x Year for week where Monday is the first day + .build(); + private static final Pattern pattern = Pattern.compile("%."); private static final Pattern CHARACTERS_WITH_NO_MOD_LITERAL_BEHIND_PATTERN = Pattern.compile("(? handler, LocalDateTime datetime) { - final StringBuffer cleanFormat = new StringBuffer(); - final Matcher m = CHARACTERS_WITH_NO_MOD_LITERAL_BEHIND_PATTERN - .matcher(formatExpr.stringValue()); - - while (m.find()) { - m.appendReplacement(cleanFormat,String.format("'%s'", m.group())); - } - m.appendTail(cleanFormat); + StringBuffer cleanFormat = getCleanFormat(formatExpr); final Matcher matcher = pattern.matcher(cleanFormat.toString()); final StringBuffer format = new StringBuffer(); @@ -201,6 +254,84 @@ static ExprValue getFormattedTime(ExprValue timeExpr, ExprValue formatExpr) { return getFormattedString(formatExpr, TIME_HANDLERS, time); } + private static boolean canGetDate(TemporalAccessor ta) { + return (ta.isSupported(ChronoField.YEAR) + && ta.isSupported(ChronoField.MONTH_OF_YEAR) + && ta.isSupported(ChronoField.DAY_OF_MONTH)); + } + + private static boolean canGetTime(TemporalAccessor ta) { + return (ta.isSupported(ChronoField.HOUR_OF_DAY) + && ta.isSupported(ChronoField.MINUTE_OF_HOUR) + && ta.isSupported(ChronoField.SECOND_OF_MINUTE)); + } + + static ExprValue parseStringWithDateOrTime(FunctionProperties fp, + ExprValue datetimeStringExpr, + ExprValue formatExpr) { + + //Replace patterns with % for Java DateTimeFormatter + StringBuffer cleanFormat = getCleanFormat(formatExpr); + final Matcher matcher = pattern.matcher(cleanFormat.toString()); + final StringBuffer format = new StringBuffer(); + + while (matcher.find()) { + matcher.appendReplacement(format, + STR_TO_DATE_FORMATS.getOrDefault(matcher.group(), + String.format("'%s'", matcher.group().replaceFirst(MOD_LITERAL, "")))); + } + matcher.appendTail(format); + + TemporalAccessor taWithMissingFields; + //Return NULL for invalid parse in string to align with MySQL + try { + //Get Temporal Accessor to initially parse string without default values + taWithMissingFields = new DateTimeFormatterBuilder() + .appendPattern(format.toString()) + .toFormatter().withResolverStyle(ResolverStyle.STRICT) + .parseUnresolved(datetimeStringExpr.stringValue(), new ParsePosition(0)); + if (taWithMissingFields == null) { + throw new DateTimeException("Input string could not be parsed properly."); + } + if (!canGetDate(taWithMissingFields) && !canGetTime(taWithMissingFields)) { + throw new DateTimeException("Not enough data to build a valid Date, Time, or Datetime."); + } + } catch (DateTimeException e) { + return ExprNullValue.of(); + } + + int year = taWithMissingFields.isSupported(ChronoField.YEAR) + ? taWithMissingFields.get(ChronoField.YEAR) : 2000; + + int month = taWithMissingFields.isSupported(ChronoField.MONTH_OF_YEAR) + ? taWithMissingFields.get(ChronoField.MONTH_OF_YEAR) : 1; + + int day = taWithMissingFields.isSupported(ChronoField.DAY_OF_MONTH) + ? taWithMissingFields.get(ChronoField.DAY_OF_MONTH) : 1; + + int hour = taWithMissingFields.isSupported(ChronoField.HOUR_OF_DAY) + ? taWithMissingFields.get(ChronoField.HOUR_OF_DAY) : 0; + + int minute = taWithMissingFields.isSupported(ChronoField.MINUTE_OF_HOUR) + ? taWithMissingFields.get(ChronoField.MINUTE_OF_HOUR) : 0; + + int second = taWithMissingFields.isSupported(ChronoField.SECOND_OF_MINUTE) + ? taWithMissingFields.get(ChronoField.SECOND_OF_MINUTE) : 0; + + //Fill returned datetime with current date if only Time information was parsed + LocalDateTime output; + if (!canGetDate(taWithMissingFields)) { + output = LocalDateTime.of( + LocalDate.now(fp.getQueryStartClock()), + LocalTime.of(hour, minute, second) + ); + } else { + output = LocalDateTime.of(year, month, day, hour, minute, second); + } + + return new ExprDatetimeValue(output); + } + /** * Returns English suffix of incoming value. * @param val Incoming value. diff --git a/core/src/main/java/org/opensearch/sql/expression/datetime/DateTimeFunction.java b/core/src/main/java/org/opensearch/sql/expression/datetime/DateTimeFunction.java index 0856d2c1fe..9fa3e03187 100644 --- a/core/src/main/java/org/opensearch/sql/expression/datetime/DateTimeFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/datetime/DateTimeFunction.java @@ -212,6 +212,7 @@ public void register(BuiltinFunctionRepository repository) { repository.register(second(BuiltinFunctionName.SECOND_OF_MINUTE)); repository.register(subdate()); repository.register(subtime()); + repository.register(str_to_date()); repository.register(sysdate()); repository.register(time()); repository.register(time_format()); @@ -810,6 +811,18 @@ private DefaultFunctionResolver subtime() { ); } + /** + * Extracts a date, time, or datetime from the given string. + * It accomplishes this using another string which specifies the input format. + */ + private DefaultFunctionResolver str_to_date() { + return define(BuiltinFunctionName.STR_TO_DATE.getName(), + implWithProperties( + nullMissingHandlingWithProperties((functionProperties, arg, format) + -> DateTimeFunction.exprStrToDate(functionProperties, arg, format)), + DATETIME, STRING, STRING)); + } + /** * Extracts the time part of a date and time value. * Also to construct a time type. The supported signatures: @@ -1718,6 +1731,12 @@ private ExprValue exprSubTime(FunctionProperties functionProperties, return exprApplyTime(functionProperties, temporal, temporalDelta, false); } + private ExprValue exprStrToDate(FunctionProperties fp, + ExprValue dateTimeExpr, + ExprValue formatStringExp) { + return DateTimeFormatterUtil.parseStringWithDateOrTime(fp, dateTimeExpr, formatStringExp); + } + /** * Time implementation for ExprValue. * diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index 7b6d364c0a..5dce22f859 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -102,6 +102,7 @@ public enum BuiltinFunctionName { SEC_TO_TIME(FunctionName.of("sec_to_time")), SECOND(FunctionName.of("second")), SECOND_OF_MINUTE(FunctionName.of("second_of_minute")), + STR_TO_DATE(FunctionName.of("str_to_date")), SUBDATE(FunctionName.of("subdate")), SUBTIME(FunctionName.of("subtime")), TIME(FunctionName.of("time")), diff --git a/core/src/test/java/org/opensearch/sql/expression/datetime/StrToDateTest.java b/core/src/test/java/org/opensearch/sql/expression/datetime/StrToDateTest.java new file mode 100644 index 0000000000..b758331a71 --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/expression/datetime/StrToDateTest.java @@ -0,0 +1,212 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + + +package org.opensearch.sql.expression.datetime; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.opensearch.sql.data.type.ExprCoreType.DATETIME; +import static org.opensearch.sql.data.type.ExprCoreType.UNDEFINED; + +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.LocalTime; +import java.util.stream.Stream; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.sql.data.model.ExprDatetimeValue; +import org.opensearch.sql.data.model.ExprNullValue; +import org.opensearch.sql.data.model.ExprStringValue; +import org.opensearch.sql.data.model.ExprTimeValue; +import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.data.type.ExprCoreType; +import org.opensearch.sql.expression.DSL; +import org.opensearch.sql.expression.Expression; +import org.opensearch.sql.expression.ExpressionTestBase; +import org.opensearch.sql.expression.FunctionExpression; + +class StrToDateTest extends ExpressionTestBase { + + private static Stream getTestDataForStrToDate() { + return Stream.of( + //Date arguments + Arguments.of( + "01,5,2013", + "%d,%m,%Y", + new ExprDatetimeValue("2013-05-01 00:00:00"), + DATETIME), + Arguments.of( + "May 1, 2013", + "%M %d, %Y", + new ExprDatetimeValue("2013-05-01 00:00:00"), + DATETIME), + Arguments.of( + "May 1, 2013 - 9,23,11", + "%M %d, %Y - %h,%i,%s", + new ExprDatetimeValue("2013-05-01 09:23:11"), + DATETIME), + Arguments.of( + "2000,1,1", + "%Y,%m,%d", + new ExprDatetimeValue("2000-01-01 00:00:00"), + DATETIME), + Arguments.of( + "2000,1,1,10", + "%Y,%m,%d,%h", + new ExprDatetimeValue("2000-01-01 10:00:00"), + DATETIME), + Arguments.of( + "2000,1,1,10,11", + "%Y,%m,%d,%h,%i", + new ExprDatetimeValue("2000-01-01 10:11:00"), + DATETIME), + + //Invalid Arguments (should return null) + Arguments.of( + "a09:30:17", + "a%h:%i:%s", + ExprNullValue.of(), + UNDEFINED), + Arguments.of( + "abc", + "abc", + ExprNullValue.of(), + UNDEFINED), + Arguments.of( + "2000,1", + "%Y,%m", + ExprNullValue.of(), + UNDEFINED), + Arguments.of( + "2000,1,10", + "%Y,%m,%h", + ExprNullValue.of(), + UNDEFINED), + Arguments.of( + "2000,1,10,11", + "%Y,%m,%h,%i", + ExprNullValue.of(), + UNDEFINED), + Arguments.of( + "9", + "%m", + ExprNullValue.of(), + UNDEFINED), + Arguments.of( + "9", + "%s", + ExprNullValue.of(), + UNDEFINED) + ); + } + + @ParameterizedTest(name = "{0} | {1}") + @MethodSource("getTestDataForStrToDate") + public void test_str_to_date( + String datetime, + String format, + ExprValue expectedResult, + ExprCoreType expectedType) { + + FunctionExpression expression = DSL.str_to_date( + functionProperties, + DSL.literal(new ExprStringValue(datetime)), + DSL.literal(new ExprStringValue(format))); + + ExprValue result = eval(expression); + + assertEquals(expectedType, result.type()); + assertEquals(expectedResult, result); + } + + private static LocalDateTime getExpectedTimeResult(int hour, int minute, int seconds) { + return LocalDateTime.of( + LocalDate.now().getYear(), + LocalDate.now().getMonthValue(), + LocalDate.now().getDayOfMonth(), + hour, + minute, + seconds + ); + } + + private static Stream getTestDataForStrToDateWithTime() { + return Stream.of( + Arguments.of("9,23,11", "%h,%i,%s"), + Arguments.of("2000,9,23,11", "%Y,%h,%i,%s"), + Arguments.of("2000,3,9,23,11", "%Y,%m,%h,%i,%s") + ); + } + + @ParameterizedTest(name = "{1}") + @MethodSource("getTestDataForStrToDateWithTime") + public void test_str_to_date_with_time_type(String parsed, String format) { + + FunctionExpression expression = DSL.str_to_date( + functionProperties, + DSL.literal(new ExprStringValue(parsed)), + DSL.literal(new ExprStringValue(format))); + + ExprValue result = eval(expression); + + assertEquals(DATETIME, result.type()); + assertEquals(getExpectedTimeResult(9, 23, 11), result.datetimeValue()); + } + + @Test + public void test_str_to_date_with_date_format() { + + LocalDateTime arg = LocalDateTime.of(2023, 2, 27, 10, 11,12); + String format = "%Y,%m,%d %h,%i,%s"; + + FunctionExpression dateFormatExpr = DSL.date_format( + functionProperties, + DSL.literal(new ExprDatetimeValue(arg)), + DSL.literal(new ExprStringValue(format))); + String dateFormatResult = eval(dateFormatExpr).stringValue(); + + FunctionExpression strToDateExpr = DSL.str_to_date( + functionProperties, + DSL.literal(new ExprStringValue(dateFormatResult)), + DSL.literal(new ExprStringValue(format))); + LocalDateTime strToDateResult = eval(strToDateExpr).datetimeValue(); + + assertEquals(arg, strToDateResult); + } + + @Test + public void test_str_to_date_with_time_format() { + final int HOURS = 10; + final int MINUTES = 11; + final int SECONDS = 12; + + LocalTime arg = LocalTime.of(HOURS, MINUTES,SECONDS); + String format = "%h,%i,%s"; + + FunctionExpression dateFormatExpr = DSL.time_format( + functionProperties, + DSL.literal(new ExprTimeValue(arg)), + DSL.literal(new ExprStringValue(format))); + String timeFormatResult = eval(dateFormatExpr).stringValue(); + + FunctionExpression strToDateExpr = DSL.str_to_date( + functionProperties, + DSL.literal(new ExprStringValue(timeFormatResult)), + DSL.literal(new ExprStringValue(format))); + LocalDateTime strToDateResult = eval(strToDateExpr).datetimeValue(); + + assertEquals( + getExpectedTimeResult(HOURS, MINUTES, SECONDS), + strToDateResult); + } + + private ExprValue eval(Expression expression) { + return expression.valueOf(); + } +} diff --git a/docs/user/dql/functions.rst b/docs/user/dql/functions.rst index b0d10a656c..0c257bef98 100644 --- a/docs/user/dql/functions.rst +++ b/docs/user/dql/functions.rst @@ -2324,6 +2324,32 @@ Example:: +--------------------------------------+ +STR_TO_DATE +----------- + +Description +>>>>>>>>>>> + +Usage: str_to_date(string, string) is used to extract a DATETIME from the first argument string using the formats specified in the second argument string. +The input argument must have enough information to be parsed as a DATE, DATETIME, or TIME. +Acceptable string format specifiers are the same as those used in the `DATE_FORMAT`_ function. +It returns NULL when a statement cannot be parsed due to an invalid pair of arguments, and when 0 is provided for any DATE field. Otherwise, it will return a DATETIME with the parsed values (as well as default values for any field that was not parsed). + +Argument type: STRING, STRING + +Return type: DATETIME + +Example:: + + OS> SELECT str_to_date("01,5,2013", "%d,%m,%Y") + fetched rows / total rows = 1/1 + +----------------------------------------+ + | str_to_date("01,5,2013", "%d,%m,%Y") | + |----------------------------------------| + | 2013-05-01 00:00:00 | + +----------------------------------------+ + + SUBDATE ------- diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/DateTimeFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/DateTimeFunctionIT.java index d9cb2b1008..54668492e0 100644 --- a/integ-test/src/test/java/org/opensearch/sql/sql/DateTimeFunctionIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/sql/DateTimeFunctionIT.java @@ -868,6 +868,39 @@ public void testSecondFunctionAliasesReturnTheSameResults() throws IOException { result1.getJSONArray("datarows").similar(result2.getJSONArray("datarows")); } + @Test + public void testStrToDate() throws IOException { + //Ideal case + JSONObject result = executeQuery( + String.format("SELECT str_to_date(CAST(birthdate AS STRING)," + + " '%%Y-%%m-%%d %%h:%%i:%%s') FROM %s LIMIT 2", + TEST_INDEX_BANK)); + verifyDataRows(result, + rows("2017-10-23 00:00:00"), + rows("2017-11-20 00:00:00") + ); + + //Bad string format case + result = executeQuery( + String.format("SELECT str_to_date(CAST(birthdate AS STRING)," + + " '%%Y %%s') FROM %s LIMIT 2", + TEST_INDEX_BANK)); + verifyDataRows(result, + rows((Object) null), + rows((Object) null) + ); + + //bad date format case + result = executeQuery( + String.format("SELECT str_to_date(firstname," + + " '%%Y-%%m-%%d %%h:%%i:%%s') FROM %s LIMIT 2", + TEST_INDEX_BANK)); + verifyDataRows(result, + rows((Object) null), + rows((Object) null) + ); + } + @Test public void testSubDateWithDays() throws IOException { var result = diff --git a/sql/src/main/antlr/OpenSearchSQLLexer.g4 b/sql/src/main/antlr/OpenSearchSQLLexer.g4 index b550d7860f..e41851bd63 100644 --- a/sql/src/main/antlr/OpenSearchSQLLexer.g4 +++ b/sql/src/main/antlr/OpenSearchSQLLexer.g4 @@ -255,6 +255,7 @@ SIGNUM: 'SIGNUM'; SIN: 'SIN'; SINH: 'SINH'; SQRT: 'SQRT'; +STR_TO_DATE: 'STR_TO_DATE'; SUBDATE: 'SUBDATE'; SUBTIME: 'SUBTIME'; SUBTRACT: 'SUBTRACT'; diff --git a/sql/src/main/antlr/OpenSearchSQLParser.g4 b/sql/src/main/antlr/OpenSearchSQLParser.g4 index 7b366d1f6c..2b0597f7a7 100644 --- a/sql/src/main/antlr/OpenSearchSQLParser.g4 +++ b/sql/src/main/antlr/OpenSearchSQLParser.g4 @@ -499,6 +499,7 @@ dateTimeFunctionName | SUBDATE | SUBTIME | SYSDATE + | STR_TO_DATE | TIME | TIME_FORMAT | TIME_TO_SEC diff --git a/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLSyntaxParserTest.java b/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLSyntaxParserTest.java index 10fc70542d..c2c578472c 100644 --- a/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLSyntaxParserTest.java +++ b/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLSyntaxParserTest.java @@ -419,6 +419,21 @@ public void can_parse_simple_query_string_relevance_function() { + "flags='AND', quote_field_suffix=\".exact\", fuzzy_prefix_length = 4)")); } + @Test + public void can_parse_str_to_date() { + assertNotNull(parser.parse( + "SELECT STR_TO_DATE('01,5,2013','%d,%m,%Y')" + )); + + assertNotNull(parser.parse( + "SELECT STR_TO_DATE('a09:30:17','a%h:%i:%s')" + )); + + assertNotNull(parser.parse( + "SELECT STR_TO_DATE('abc','abc');" + )); + } + @Test public void can_parse_query_string_relevance_function() { assertNotNull(parser.parse(