diff --git a/core/src/main/java/com/amazon/opendistroforelasticsearch/sql/expression/DSL.java b/core/src/main/java/com/amazon/opendistroforelasticsearch/sql/expression/DSL.java index c0ecd829a6..cc2c5e21cf 100644 --- a/core/src/main/java/com/amazon/opendistroforelasticsearch/sql/expression/DSL.java +++ b/core/src/main/java/com/amazon/opendistroforelasticsearch/sql/expression/DSL.java @@ -273,6 +273,54 @@ public FunctionExpression module(Expression... expressions) { return function(BuiltinFunctionName.MODULES, expressions); } + public FunctionExpression substr(Expression... expressions) { + return function(BuiltinFunctionName.SUBSTR, expressions); + } + + public FunctionExpression substring(Expression... expressions) { + return function(BuiltinFunctionName.SUBSTR, expressions); + } + + public FunctionExpression ltrim(Expression... expressions) { + return function(BuiltinFunctionName.LTRIM, expressions); + } + + public FunctionExpression rtrim(Expression... expressions) { + return function(BuiltinFunctionName.RTRIM, expressions); + } + + public FunctionExpression trim(Expression... expressions) { + return function(BuiltinFunctionName.TRIM, expressions); + } + + public FunctionExpression upper(Expression... expressions) { + return function(BuiltinFunctionName.UPPER, expressions); + } + + public FunctionExpression lower(Expression... expressions) { + return function(BuiltinFunctionName.LOWER, expressions); + } + + public FunctionExpression regexp(Expression... expressions) { + return function(BuiltinFunctionName.REGEXP, expressions); + } + + public FunctionExpression concat(Expression... expressions) { + return function(BuiltinFunctionName.CONCAT, expressions); + } + + public FunctionExpression concat_ws(Expression... expressions) { + return function(BuiltinFunctionName.CONCAT_WS, expressions); + } + + public FunctionExpression length(Expression... expressions) { + return function(BuiltinFunctionName.LENGTH, expressions); + } + + public FunctionExpression strcmp(Expression... expressions) { + return function(BuiltinFunctionName.STRCMP, expressions); + } + public FunctionExpression and(Expression... expressions) { return function(BuiltinFunctionName.AND, expressions); } diff --git a/core/src/main/java/com/amazon/opendistroforelasticsearch/sql/expression/config/ExpressionConfig.java b/core/src/main/java/com/amazon/opendistroforelasticsearch/sql/expression/config/ExpressionConfig.java index ae561a7765..6d3a77035c 100644 --- a/core/src/main/java/com/amazon/opendistroforelasticsearch/sql/expression/config/ExpressionConfig.java +++ b/core/src/main/java/com/amazon/opendistroforelasticsearch/sql/expression/config/ExpressionConfig.java @@ -24,6 +24,7 @@ import com.amazon.opendistroforelasticsearch.sql.expression.operator.arthmetic.MathematicalFunction; import com.amazon.opendistroforelasticsearch.sql.expression.operator.predicate.BinaryPredicateOperator; import com.amazon.opendistroforelasticsearch.sql.expression.operator.predicate.UnaryPredicateOperator; +import com.amazon.opendistroforelasticsearch.sql.expression.text.TextFunction; import java.util.HashMap; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; @@ -47,6 +48,7 @@ public BuiltinFunctionRepository functionRepository() { AggregatorFunction.register(builtinFunctionRepository); DateTimeFunction.register(builtinFunctionRepository); IntervalClause.register(builtinFunctionRepository); + TextFunction.register(builtinFunctionRepository); return builtinFunctionRepository; } diff --git a/core/src/main/java/com/amazon/opendistroforelasticsearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/com/amazon/opendistroforelasticsearch/sql/expression/function/BuiltinFunctionName.java index ca8b17e565..b853b9c855 100644 --- a/core/src/main/java/com/amazon/opendistroforelasticsearch/sql/expression/function/BuiltinFunctionName.java +++ b/core/src/main/java/com/amazon/opendistroforelasticsearch/sql/expression/function/BuiltinFunctionName.java @@ -94,6 +94,22 @@ public enum BuiltinFunctionName { SUM(FunctionName.of("sum")), COUNT(FunctionName.of("count")), + /** + * Text Functions. + */ + SUBSTR(FunctionName.of("substr")), + SUBSTRING(FunctionName.of("substring")), + RTRIM(FunctionName.of("rtrim")), + LTRIM(FunctionName.of("ltrim")), + TRIM(FunctionName.of("trim")), + UPPER(FunctionName.of("upper")), + LOWER(FunctionName.of("lower")), + REGEXP(FunctionName.of("regexp")), + CONCAT(FunctionName.of("concat")), + CONCAT_WS(FunctionName.of("concat_ws")), + LENGTH(FunctionName.of("length")), + STRCMP(FunctionName.of("strcmp")), + /** * NULL Test. */ diff --git a/core/src/main/java/com/amazon/opendistroforelasticsearch/sql/expression/operator/predicate/BinaryPredicateOperator.java b/core/src/main/java/com/amazon/opendistroforelasticsearch/sql/expression/operator/predicate/BinaryPredicateOperator.java index a45699546a..d08c3fab8f 100644 --- a/core/src/main/java/com/amazon/opendistroforelasticsearch/sql/expression/operator/predicate/BinaryPredicateOperator.java +++ b/core/src/main/java/com/amazon/opendistroforelasticsearch/sql/expression/operator/predicate/BinaryPredicateOperator.java @@ -20,6 +20,7 @@ import static com.amazon.opendistroforelasticsearch.sql.data.model.ExprValueUtils.LITERAL_NULL; import static com.amazon.opendistroforelasticsearch.sql.data.model.ExprValueUtils.LITERAL_TRUE; import static com.amazon.opendistroforelasticsearch.sql.data.type.ExprCoreType.BOOLEAN; +import static com.amazon.opendistroforelasticsearch.sql.data.type.ExprCoreType.INTEGER; import static com.amazon.opendistroforelasticsearch.sql.data.type.ExprCoreType.STRING; import com.amazon.opendistroforelasticsearch.sql.data.model.ExprBooleanValue; @@ -61,6 +62,7 @@ public static void register(BuiltinFunctionRepository repository) { repository.register(gte()); repository.register(like()); repository.register(notLike()); + repository.register(regexp()); } /** @@ -245,6 +247,12 @@ private static FunctionResolver like() { STRING)); } + private static FunctionResolver regexp() { + return FunctionDSL.define(BuiltinFunctionName.REGEXP.getName(), FunctionDSL + .impl(FunctionDSL.nullMissingHandling(OperatorUtils::matchesRegexp), + INTEGER, STRING, STRING)); + } + private static FunctionResolver notLike() { return FunctionDSL.define(BuiltinFunctionName.NOT_LIKE.getName(), FunctionDSL .impl(FunctionDSL.nullMissingHandling( diff --git a/core/src/main/java/com/amazon/opendistroforelasticsearch/sql/expression/text/TextFunction.java b/core/src/main/java/com/amazon/opendistroforelasticsearch/sql/expression/text/TextFunction.java new file mode 100644 index 0000000000..4b2de00f97 --- /dev/null +++ b/core/src/main/java/com/amazon/opendistroforelasticsearch/sql/expression/text/TextFunction.java @@ -0,0 +1,229 @@ +/* + * + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + * + */ + +package com.amazon.opendistroforelasticsearch.sql.expression.text; + +import static com.amazon.opendistroforelasticsearch.sql.data.type.ExprCoreType.INTEGER; +import static com.amazon.opendistroforelasticsearch.sql.data.type.ExprCoreType.STRING; +import static com.amazon.opendistroforelasticsearch.sql.expression.function.FunctionDSL.define; +import static com.amazon.opendistroforelasticsearch.sql.expression.function.FunctionDSL.impl; +import static com.amazon.opendistroforelasticsearch.sql.expression.function.FunctionDSL.nullMissingHandling; + +import com.amazon.opendistroforelasticsearch.sql.data.model.ExprIntegerValue; +import com.amazon.opendistroforelasticsearch.sql.data.model.ExprStringValue; +import com.amazon.opendistroforelasticsearch.sql.data.model.ExprValue; +import com.amazon.opendistroforelasticsearch.sql.expression.function.BuiltinFunctionName; +import com.amazon.opendistroforelasticsearch.sql.expression.function.BuiltinFunctionRepository; +import com.amazon.opendistroforelasticsearch.sql.expression.function.FunctionName; +import com.amazon.opendistroforelasticsearch.sql.expression.function.FunctionResolver; + +import lombok.experimental.UtilityClass; + + +/** + * The definition of text functions. + * 1) have the clear interface for function define. + * 2) the implementation should rely on ExprValue. + */ +@UtilityClass +public class TextFunction { + private static String EMPTY_STRING = ""; + + /** + * Register String Functions. + * + * @param repository {@link BuiltinFunctionRepository}. + */ + public void register(BuiltinFunctionRepository repository) { + repository.register(substr()); + repository.register(substring()); + repository.register(ltrim()); + repository.register(rtrim()); + repository.register(trim()); + repository.register(lower()); + repository.register(upper()); + repository.register(concat()); + repository.register(concat_ws()); + repository.register(length()); + repository.register(strcmp()); + } + + /** + * Gets substring starting at given point, for optional given length. + * Form of this function using keywords instead of comma delimited variables is not supported. + * Supports following signatures: + * (STRING, INTEGER)/(STRING, INTEGER, INTEGER) -> STRING + */ + private FunctionResolver substringSubstr(FunctionName functionName) { + return define(functionName, + impl(nullMissingHandling(TextFunction::exprSubstrStart), + STRING, STRING, INTEGER), + impl(nullMissingHandling(TextFunction::exprSubstrStartLength), + STRING, STRING, INTEGER, INTEGER)); + } + + private FunctionResolver substring() { + return substringSubstr(BuiltinFunctionName.SUBSTRING.getName()); + } + + private FunctionResolver substr() { + return substringSubstr(BuiltinFunctionName.SUBSTR.getName()); + } + + /** + * Removes leading whitespace from string. + * Supports following signatures: + * STRING -> STRING + */ + private FunctionResolver ltrim() { + return define(BuiltinFunctionName.LTRIM.getName(), + impl(nullMissingHandling((v) -> new ExprStringValue(v.stringValue().stripLeading())), + STRING, STRING)); + } + + /** + * Removes trailing whitespace from string. + * Supports following signatures: + * STRING -> STRING + */ + private FunctionResolver rtrim() { + return define(BuiltinFunctionName.RTRIM.getName(), + impl(nullMissingHandling((v) -> new ExprStringValue(v.stringValue().stripTrailing())), + STRING, STRING)); + } + + /** + * Removes leading and trailing whitespace from string. + * Has option to specify a String to trim instead of whitespace but this is not yet supported. + * Supporting String specification requires finding keywords inside TRIM command. + * Supports following signatures: + * STRING -> STRING + */ + private FunctionResolver trim() { + return define(BuiltinFunctionName.TRIM.getName(), + impl(nullMissingHandling((v) -> new ExprStringValue(v.stringValue().trim())), + STRING, STRING)); + } + + /** + * Converts String to lowercase. + * Supports following signatures: + * STRING -> STRING + */ + private FunctionResolver lower() { + return define(BuiltinFunctionName.LOWER.getName(), + impl(nullMissingHandling((v) -> new ExprStringValue((v.stringValue().toLowerCase()))), + STRING, STRING) + ); + } + + /** + * Converts String to uppercase. + * Supports following signatures: + * STRING -> STRING + */ + private FunctionResolver upper() { + return define(BuiltinFunctionName.UPPER.getName(), + impl(nullMissingHandling((v) -> new ExprStringValue((v.stringValue().toUpperCase()))), + STRING, STRING) + ); + } + + /** + * TODO: https://github.com/opendistro-for-elasticsearch/sql/issues/710 + * Extend to accept variable argument amounts. + * Concatenates a list of Strings. + * Supports following signatures: + * (STRING, STRING) -> STRING + */ + private FunctionResolver concat() { + return define(BuiltinFunctionName.CONCAT.getName(), + impl(nullMissingHandling((str1, str2) -> + new ExprStringValue(str1.stringValue() + str2.stringValue())), STRING, STRING, STRING)); + } + + /** + * TODO: https://github.com/opendistro-for-elasticsearch/sql/issues/710 + * Extend to accept variable argument amounts. + * Concatenates a list of Strings with a separator string. + * Supports following signatures: + * (STRING, STRING, STRING) -> STRING + */ + private FunctionResolver concat_ws() { + return define(BuiltinFunctionName.CONCAT_WS.getName(), + impl(nullMissingHandling((sep, str1, str2) -> + new ExprStringValue(str1.stringValue() + sep.stringValue() + str2.stringValue())), + STRING, STRING, STRING, STRING)); + } + + /** + * Calculates length of String in bytes. + * Supports following signatures: + * STRING -> INTEGER + */ + private FunctionResolver length() { + return define(BuiltinFunctionName.LENGTH.getName(), + impl(nullMissingHandling((str) -> + new ExprIntegerValue(str.stringValue().getBytes().length)), INTEGER, STRING)); + } + + /** + * Does String comparison of two Strings and returns Integer value. + * Supports following signatures: + * (STRING, STRING) -> INTEGER + */ + private FunctionResolver strcmp() { + return define(BuiltinFunctionName.STRCMP.getName(), + impl(nullMissingHandling((str1, str2) -> + new ExprIntegerValue(Integer.compare( + str1.stringValue().compareTo(str2.stringValue()), 0))), + INTEGER, STRING, STRING)); + } + + private static ExprValue exprSubstrStart(ExprValue exprValue, ExprValue start) { + int startIdx = start.integerValue(); + if (startIdx == 0) { + return new ExprStringValue(EMPTY_STRING); + } + String str = exprValue.stringValue(); + return exprSubStr(str, startIdx, str.length()); + } + + private static ExprValue exprSubstrStartLength( + ExprValue exprValue, ExprValue start, ExprValue length) { + int startIdx = start.integerValue(); + int len = length.integerValue(); + if ((startIdx == 0) || (len == 0)) { + return new ExprStringValue(EMPTY_STRING); + } + String str = exprValue.stringValue(); + return exprSubStr(str, startIdx, len); + } + + private static ExprValue exprSubStr(String str, int start, int len) { + // Correct negative start + start = (start > 0) ? (start - 1) : (str.length() + start); + + if (start > str.length()) { + return new ExprStringValue(EMPTY_STRING); + } else if ((start + len) > str.length()) { + return new ExprStringValue(str.substring(start)); + } + return new ExprStringValue(str.substring(start, start + len)); + } +} + diff --git a/core/src/main/java/com/amazon/opendistroforelasticsearch/sql/utils/OperatorUtils.java b/core/src/main/java/com/amazon/opendistroforelasticsearch/sql/utils/OperatorUtils.java index d9b3352c69..d887d5c391 100644 --- a/core/src/main/java/com/amazon/opendistroforelasticsearch/sql/utils/OperatorUtils.java +++ b/core/src/main/java/com/amazon/opendistroforelasticsearch/sql/utils/OperatorUtils.java @@ -16,6 +16,7 @@ package com.amazon.opendistroforelasticsearch.sql.utils; import com.amazon.opendistroforelasticsearch.sql.data.model.ExprBooleanValue; +import com.amazon.opendistroforelasticsearch.sql.data.model.ExprIntegerValue; import com.amazon.opendistroforelasticsearch.sql.data.model.ExprValue; import java.util.regex.Pattern; import lombok.experimental.UtilityClass; @@ -35,6 +36,16 @@ public static ExprBooleanValue matches(ExprValue text, ExprValue pattern) { .matches()); } + /** + * Checks if text matches regular expression pattern. + * @param pattern string pattern to match. + * @return if text matches pattern returns true; else return false. + */ + public static ExprIntegerValue matchesRegexp(ExprValue text, ExprValue pattern) { + return new ExprIntegerValue(Pattern.compile(pattern.stringValue()).matcher(text.stringValue()) + .matches() ? 1 : 0); + } + private static final char DEFAULT_ESCAPE = '\\'; private static String patternToRegex(String patternString) { diff --git a/core/src/test/java/com/amazon/opendistroforelasticsearch/sql/expression/operator/predicate/BinaryPredicateOperatorTest.java b/core/src/test/java/com/amazon/opendistroforelasticsearch/sql/expression/operator/predicate/BinaryPredicateOperatorTest.java index d58830bf35..3c7dec7f15 100644 --- a/core/src/test/java/com/amazon/opendistroforelasticsearch/sql/expression/operator/predicate/BinaryPredicateOperatorTest.java +++ b/core/src/test/java/com/amazon/opendistroforelasticsearch/sql/expression/operator/predicate/BinaryPredicateOperatorTest.java @@ -27,6 +27,7 @@ import static com.amazon.opendistroforelasticsearch.sql.data.model.ExprValueUtils.LITERAL_TRUE; import static com.amazon.opendistroforelasticsearch.sql.data.model.ExprValueUtils.booleanValue; import static com.amazon.opendistroforelasticsearch.sql.data.model.ExprValueUtils.fromObjectValue; +import static com.amazon.opendistroforelasticsearch.sql.data.model.ExprValueUtils.missingValue; import static com.amazon.opendistroforelasticsearch.sql.data.type.ExprCoreType.BOOLEAN; import static com.amazon.opendistroforelasticsearch.sql.data.type.ExprCoreType.INTEGER; import static com.amazon.opendistroforelasticsearch.sql.data.type.ExprCoreType.STRING; @@ -62,13 +63,35 @@ import java.util.Base64; import java.util.List; import java.util.stream.Stream; + +import lombok.AllArgsConstructor; +import lombok.Getter; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.MethodSource; +import org.mockito.Mock; class BinaryPredicateOperatorTest extends ExpressionTestBase { + private static List STRING_PATTERN_PAIRS = ImmutableList.of( + new StringPatternPair("Michael!", ".*"), + new StringPatternPair("new*\\n*line", "new\\\\*.\\\\*line"), + new StringPatternPair("a", "^[a-d]"), + new StringPatternPair("helo", "world"), + new StringPatternPair("a", "A")); + + @AllArgsConstructor + @Getter + static class StringPatternPair { + private final String str; + private final String patt; + + int regExpTest() { + return str.matches(patt) ? 1 : 0; + } + } + private static Stream binaryPredicateArguments() { List booleans = Arrays.asList(true, false); return Lists.cartesianProduct(booleans, booleans).stream() @@ -758,6 +781,20 @@ public void test_not_like() { assertEquals(String.format("not like(\"%s\", \"%s\")", "bob", "bo%"), notLike.toString()); } + @Test + void test_regexp() { + STRING_PATTERN_PAIRS.forEach(this::testRegexpString); + } + + void testRegexpString(StringPatternPair stringPatternPair) { + FunctionExpression expression = dsl.regexp( + DSL.literal(new ExprStringValue(stringPatternPair.getStr())), + DSL.literal(new ExprStringValue(stringPatternPair.getPatt()))); + assertEquals(INTEGER, expression.type()); + assertEquals(stringPatternPair.regExpTest(), expression + .valueOf(valueEnv()).integerValue()); + } + /** * Todo. remove this test cases after script serilization implemented. */ diff --git a/core/src/test/java/com/amazon/opendistroforelasticsearch/sql/expression/text/TextFunctionTest.java b/core/src/test/java/com/amazon/opendistroforelasticsearch/sql/expression/text/TextFunctionTest.java new file mode 100644 index 0000000000..d66c89089f --- /dev/null +++ b/core/src/test/java/com/amazon/opendistroforelasticsearch/sql/expression/text/TextFunctionTest.java @@ -0,0 +1,350 @@ +/* + * + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + * + */ + +package com.amazon.opendistroforelasticsearch.sql.expression.text; + +import static com.amazon.opendistroforelasticsearch.sql.data.model.ExprValueUtils.missingValue; +import static com.amazon.opendistroforelasticsearch.sql.data.model.ExprValueUtils.nullValue; +import static com.amazon.opendistroforelasticsearch.sql.data.model.ExprValueUtils.stringValue; +import static com.amazon.opendistroforelasticsearch.sql.data.type.ExprCoreType.INTEGER; +import static com.amazon.opendistroforelasticsearch.sql.data.type.ExprCoreType.STRING; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.Mockito.when; + +import com.amazon.opendistroforelasticsearch.sql.data.model.ExprStringValue; +import com.amazon.opendistroforelasticsearch.sql.data.model.ExprValue; +import com.amazon.opendistroforelasticsearch.sql.expression.DSL; +import com.amazon.opendistroforelasticsearch.sql.expression.Expression; +import com.amazon.opendistroforelasticsearch.sql.expression.ExpressionTestBase; +import com.amazon.opendistroforelasticsearch.sql.expression.FunctionExpression; +import com.amazon.opendistroforelasticsearch.sql.expression.env.Environment; +import com.google.common.collect.ImmutableList; + +import java.util.List; +import java.util.Objects; +import java.util.stream.Collectors; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.api.extension.TestInstantiationException; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; + +@ExtendWith(MockitoExtension.class) +public class TextFunctionTest extends ExpressionTestBase { + @Mock + Environment env; + + @Mock + Expression nullRef; + + @Mock + Expression missingRef; + + + private static List SUBSTRING_STRINGS = ImmutableList.of( + new SubstringInfo("", 1, 1, ""), + new SubstringInfo("Quadratically", 5, null, "ratically"), + new SubstringInfo("foobarbar", 4, null, "barbar"), + new SubstringInfo("Quadratically", 5, 6, "ratica"), + new SubstringInfo("Quadratically", 5, 600, "ratically"), + new SubstringInfo("Quadratically", 500, 1, ""), + new SubstringInfo("Quadratically", 500, null, ""), + new SubstringInfo("Sakila", -3, null, "ila"), + new SubstringInfo("Sakila", -5, 3, "aki"), + new SubstringInfo("Sakila", -4, 2, "ki"), + new SubstringInfo("Quadratically", 0, null, ""), + new SubstringInfo("Sakila", 0, 2, ""), + new SubstringInfo("Sakila", 2, 0, ""), + new SubstringInfo("Sakila", 0, 0, "")); + private static List UPPER_LOWER_STRINGS = ImmutableList.of( + "test", " test", "test ", " test ", "TesT", "TEST", " TEST", "TEST ", " TEST ", " ", ""); + private static List STRING_PATTERN_PAIRS = ImmutableList.of( + new StringPatternPair("Michael!", "Michael!"), + new StringPatternPair("hello", "world"), + new StringPatternPair("world", "hello")); + private static List TRIM_STRINGS = ImmutableList.of( + " test", " test", "test ", "test", " test ", "", " "); + private static List> CONCAT_STRING_LISTS = ImmutableList.of( + ImmutableList.of("hello", "world"), + ImmutableList.of("123", "5325")); + + interface SubstrSubstring { + FunctionExpression getFunction(SubstringInfo strInfo); + } + + class Substr implements SubstrSubstring { + public FunctionExpression getFunction(SubstringInfo strInfo) { + FunctionExpression expr; + if (strInfo.getLen() == null) { + expr = dsl.substr(DSL.literal(strInfo.getExpr()), DSL.literal(strInfo.getStart())); + } else { + expr = dsl.substr(DSL.literal(strInfo.getExpr()), + DSL.literal(strInfo.getStart()), + DSL.literal(strInfo.getLen())); + } + return expr; + } + } + + class Substring implements SubstrSubstring { + public FunctionExpression getFunction(SubstringInfo strInfo) { + FunctionExpression expr; + if (strInfo.getLen() == null) { + expr = dsl.substring(DSL.literal(strInfo.getExpr()), DSL.literal(strInfo.getStart())); + } else { + expr = dsl.substring(DSL.literal(strInfo.getExpr()), + DSL.literal(strInfo.getStart()), + DSL.literal(strInfo.getLen())); + } + return expr; + } + } + + @AllArgsConstructor + @Getter + static class StringPatternPair { + private final String str; + private final String patt; + + int strCmpTest() { + return Integer.compare(str.compareTo(patt), 0); + } + } + + @AllArgsConstructor + @Getter + static class SubstringInfo { + String expr; + Integer start; + Integer len; + String res; + } + + @BeforeEach + public void setup() { + when(nullRef.valueOf(env)).thenReturn(nullValue()); + when(missingRef.valueOf(env)).thenReturn(missingValue()); + } + + @Test + public void substrSubstring() { + SUBSTRING_STRINGS.forEach(s -> substrSubstringTest(s, new Substr())); + SUBSTRING_STRINGS.forEach(s -> substrSubstringTest(s, new Substring())); + + when(nullRef.type()).thenReturn(STRING); + when(missingRef.type()).thenReturn(STRING); + assertEquals(missingValue(), eval(dsl.substr(missingRef, DSL.literal(1)))); + assertEquals(nullValue(), eval(dsl.substr(nullRef, DSL.literal(1)))); + assertEquals(missingValue(), eval(dsl.substring(missingRef, DSL.literal(1)))); + assertEquals(nullValue(), eval(dsl.substring(nullRef, DSL.literal(1)))); + + when(nullRef.type()).thenReturn(INTEGER); + when(missingRef.type()).thenReturn(INTEGER); + assertEquals(missingValue(), eval(dsl.substr(DSL.literal("hello"), missingRef))); + assertEquals(nullValue(), eval(dsl.substr(DSL.literal("hello"), nullRef))); + assertEquals(missingValue(), eval(dsl.substring(DSL.literal("hello"), missingRef))); + assertEquals(nullValue(), eval(dsl.substring(DSL.literal("hello"), nullRef))); + } + + void substrSubstringTest(SubstringInfo strInfo, SubstrSubstring substrSubstring) { + FunctionExpression expr = substrSubstring.getFunction(strInfo); + assertEquals(STRING, expr.type()); + assertEquals(strInfo.getRes(), eval(expr).stringValue()); + } + + @Test + public void ltrim() { + TRIM_STRINGS.forEach(this::ltrimString); + + when(nullRef.type()).thenReturn(STRING); + when(missingRef.type()).thenReturn(STRING); + assertEquals(missingValue(), eval(dsl.ltrim(missingRef))); + assertEquals(nullValue(), eval(dsl.ltrim(nullRef))); + } + + @Test + public void rtrim() { + TRIM_STRINGS.forEach(this::rtrimString); + + when(nullRef.type()).thenReturn(STRING); + when(missingRef.type()).thenReturn(STRING); + assertEquals(missingValue(), eval(dsl.ltrim(missingRef))); + assertEquals(nullValue(), eval(dsl.ltrim(nullRef))); + } + + @Test + public void trim() { + TRIM_STRINGS.forEach(this::trimString); + + when(nullRef.type()).thenReturn(STRING); + when(missingRef.type()).thenReturn(STRING); + assertEquals(missingValue(), eval(dsl.ltrim(missingRef))); + assertEquals(nullValue(), eval(dsl.ltrim(nullRef))); + } + + void ltrimString(String str) { + FunctionExpression expression = dsl.ltrim(DSL.literal(str)); + assertEquals(STRING, expression.type()); + assertEquals(str.stripLeading(), eval(expression).stringValue()); + } + + void rtrimString(String str) { + FunctionExpression expression = dsl.rtrim(DSL.literal(str)); + assertEquals(STRING, expression.type()); + assertEquals(str.stripTrailing(), eval(expression).stringValue()); + } + + void trimString(String str) { + FunctionExpression expression = dsl.trim(DSL.literal(str)); + assertEquals(STRING, expression.type()); + assertEquals(str.trim(), eval(expression).stringValue()); + } + + @Test + public void lower() { + UPPER_LOWER_STRINGS.forEach(this::testLowerString); + + when(nullRef.type()).thenReturn(STRING); + when(missingRef.type()).thenReturn(STRING); + assertEquals(missingValue(), eval(dsl.lower(missingRef))); + assertEquals(nullValue(), eval(dsl.lower(nullRef))); + } + + @Test + public void upper() { + UPPER_LOWER_STRINGS.forEach(this::testUpperString); + + when(nullRef.type()).thenReturn(STRING); + when(missingRef.type()).thenReturn(STRING); + assertEquals(missingValue(), eval(dsl.upper(missingRef))); + assertEquals(nullValue(), eval(dsl.upper(nullRef))); + } + + @Test + void concat() { + CONCAT_STRING_LISTS.forEach(this::testConcatString); + + when(nullRef.type()).thenReturn(STRING); + when(missingRef.type()).thenReturn(STRING); + assertEquals(missingValue(), eval( + dsl.concat(missingRef, DSL.literal("1")))); + assertEquals(nullValue(), eval( + dsl.concat(nullRef, DSL.literal("1")))); + assertEquals(missingValue(), eval( + dsl.concat(DSL.literal("1"), missingRef))); + assertEquals(nullValue(), eval( + dsl.concat(DSL.literal("1"), nullRef))); + } + + @Test + void concat_ws() { + CONCAT_STRING_LISTS.forEach(s -> testConcatString(s, ",")); + + when(nullRef.type()).thenReturn(STRING); + when(missingRef.type()).thenReturn(STRING); + assertEquals(missingValue(), eval( + dsl.concat_ws(missingRef, DSL.literal("1"), DSL.literal("1")))); + assertEquals(nullValue(), eval( + dsl.concat_ws(nullRef, DSL.literal("1"), DSL.literal("1")))); + assertEquals(missingValue(), eval( + dsl.concat_ws(DSL.literal("1"), missingRef, DSL.literal("1")))); + assertEquals(nullValue(), eval( + dsl.concat_ws(DSL.literal("1"), nullRef, DSL.literal("1")))); + assertEquals(missingValue(), eval( + dsl.concat_ws(DSL.literal("1"), DSL.literal("1"), missingRef))); + assertEquals(nullValue(), eval( + dsl.concat_ws(DSL.literal("1"), DSL.literal("1"), nullRef))); + } + + @Test + void length() { + UPPER_LOWER_STRINGS.forEach(this::testLengthString); + + when(nullRef.type()).thenReturn(STRING); + when(missingRef.type()).thenReturn(STRING); + assertEquals(missingValue(), eval(dsl.length(missingRef))); + assertEquals(nullValue(), eval(dsl.length(nullRef))); + } + + @Test + void strcmp() { + STRING_PATTERN_PAIRS.forEach(this::testStcmpString); + + when(nullRef.type()).thenReturn(STRING); + when(missingRef.type()).thenReturn(STRING); + assertEquals(missingValue(), eval(dsl.strcmp(missingRef, missingRef))); + assertEquals(nullValue(), eval(dsl.strcmp(nullRef, nullRef))); + assertEquals(missingValue(), eval(dsl.strcmp(nullRef, missingRef))); + assertEquals(missingValue(), eval(dsl.strcmp(missingRef, nullRef))); + } + + void testConcatString(List strings) { + String expected = null; + if (strings.stream().noneMatch(Objects::isNull)) { + expected = String.join("", strings); + } + + FunctionExpression expression = dsl.concat( + DSL.literal(strings.get(0)), DSL.literal(strings.get(1))); + assertEquals(STRING, expression.type()); + assertEquals(expected, eval(expression).stringValue()); + } + + void testConcatString(List strings, String delim) { + String expected = strings.stream() + .filter(Objects::nonNull).collect(Collectors.joining(",")); + + FunctionExpression expression = dsl.concat_ws( + DSL.literal(delim), DSL.literal(strings.get(0)), DSL.literal(strings.get(1))); + assertEquals(STRING, expression.type()); + assertEquals(expected, eval(expression).stringValue()); + } + + void testLengthString(String str) { + FunctionExpression expression = dsl.length(DSL.literal(new ExprStringValue(str))); + assertEquals(INTEGER, expression.type()); + assertEquals(str.getBytes().length, eval(expression).integerValue()); + } + + void testStcmpString(StringPatternPair stringPatternPair) { + FunctionExpression expression = dsl.strcmp( + DSL.literal(new ExprStringValue(stringPatternPair.getStr())), + DSL.literal(new ExprStringValue(stringPatternPair.getPatt()))); + assertEquals(INTEGER, expression.type()); + assertEquals(stringPatternPair.strCmpTest(), eval(expression).integerValue()); + } + + void testLowerString(String str) { + FunctionExpression expression = dsl.lower(DSL.literal(new ExprStringValue(str))); + assertEquals(STRING, expression.type()); + assertEquals(stringValue(str.toLowerCase()), eval(expression)); + } + + void testUpperString(String str) { + FunctionExpression expression = dsl.upper(DSL.literal(new ExprStringValue(str))); + assertEquals(STRING, expression.type()); + assertEquals(stringValue(str.toUpperCase()), eval(expression)); + } + + private ExprValue eval(Expression expression) { + return expression.valueOf(env); + } +} diff --git a/docs/user/dql/expressions.rst b/docs/user/dql/expressions.rst index 8b2fc88dce..861e233d1f 100644 --- a/docs/user/dql/expressions.rst +++ b/docs/user/dql/expressions.rst @@ -103,31 +103,33 @@ Comparison operators are used to compare values. The MISSING and NULL value comp Operators ````````` -+----------------+--------------------------------+ -| name | description | -+----------------+--------------------------------+ -| > | Greater than operator | -+----------------+--------------------------------+ -| >= | Greater than or equal operator | -+----------------+--------------------------------+ -| < | Less than operator | -+----------------+--------------------------------+ -| != | Not equal operator | -+----------------+--------------------------------+ -| <= | Less than or equal operator | -+----------------+--------------------------------+ -| = | Equal operator | -+----------------+--------------------------------+ -| LIKE | Simple pattern matching | -+----------------+--------------------------------+ -| IS NULL | NULL value test | -+----------------+--------------------------------+ -| IS NOT NULL | NOT NULL value test | -+----------------+--------------------------------+ -| IS MISSING | MISSING value test | -+----------------+--------------------------------+ -| IS NOT MISSING | NOT MISSING value test | -+----------------+--------------------------------+ ++----------------+----------------------------------------+ +| name | description | ++----------------+----------------------------------------+ +| > | Greater than operator | ++----------------+----------------------------------------+ +| >= | Greater than or equal operator | ++----------------+----------------------------------------+ +| < | Less than operator | ++----------------+----------------------------------------+ +| != | Not equal operator | ++----------------+----------------------------------------+ +| <= | Less than or equal operator | ++----------------+----------------------------------------+ +| = | Equal operator | ++----------------+----------------------------------------+ +| LIKE | Simple Pattern matching | ++----------------+----------------------------------------+ +| IS NULL | NULL value test | ++----------------+----------------------------------------+ +| IS NOT NULL | NOT NULL value test | ++----------------+----------------------------------------+ +| IS MISSING | MISSING value test | ++----------------+----------------------------------------+ +| IS NOT MISSING | NOT MISSING value test | ++----------------+----------------------------------------+ +| REGEXP | String matches regular expression test | ++----------------+----------------------------------------+ Basic Comparison Operator @@ -169,6 +171,20 @@ Here is an example for null value test:: | False | True | True | False | +-------------+-----------------+----------------+--------------------+ + +REGEXP value test +----------------- + +expr REGEXP pattern. The expr is string value, pattern is supports regular expression patterns:: + + od> SELECT 'Hello!' REGEXP '.*', 'a' REGEXP 'b'; + fetched rows / total rows = 1/1 + +------------------------+------------------+ + | 'Hello!' REGEXP '.*' | 'a' REGEXP 'b' | + |------------------------+------------------| + | 1 | 0 | + +------------------------+------------------+ + Function Call ============= diff --git a/docs/user/dql/functions.rst b/docs/user/dql/functions.rst index acbf46f482..ec8b4bc830 100644 --- a/docs/user/dql/functions.rst +++ b/docs/user/dql/functions.rst @@ -684,6 +684,30 @@ Example:: +-----------+--------------+ +STRCMP +------ + +Description +>>>>>>>>>>> + +Usage: strcmp(str1, str2) returns 0 if strings are same, -1 if first arg < second arg according to current sort order, and 1 otherwise. + +Argument type: STRING, STRING + +Return type: INTEGER + +Example:: + + od> SELECT STRCMP('hello', 'world'), STRCMP('hello', 'hello') + fetched rows / total rows = 1/1 + +----------------------------+----------------------------+ + | STRCMP('hello', 'world') | STRCMP('hello', 'hello') | + |----------------------------+----------------------------| + | -1 | 0 | + +----------------------------+----------------------------+ + + + SUBTRACT -------- @@ -961,7 +985,22 @@ CONCAT Description >>>>>>>>>>> -Specification is undefined and type check is skipped for now +Usage: CONCAT(str1, str2) returns str1 and str strings concatenated together. + +Argument type: STRING, STRING + +Return type: STRING + +Example:: + + od> SELECT CONCAT('hello', 'world') + fetched rows / total rows = 1/1 + +----------------------------+ + | CONCAT('hello', 'world') | + |----------------------------| + | helloworld | + +----------------------------+ + CONCAT_WS --------- @@ -969,7 +1008,21 @@ CONCAT_WS Description >>>>>>>>>>> -Specification is undefined and type check is skipped for now +Usage: CONCAT_WS(sep, str1, str2) returns str1 concatenated with str2 using sep as a separator between them. + +Argument type: STRING, STRING, STRING + +Return type: STRING + +Example:: + + od> SELECT CONCAT_WS(',', 'hello', 'world') + fetched rows / total rows = 1/1 + +------------------------------------+ + | CONCAT_WS(',', 'hello', 'world') | + |------------------------------------| + | hello,world | + +------------------------------------+ LEFT @@ -993,6 +1046,22 @@ Specifications: 1. LENGTH(STRING) -> INTEGER +Usage: length(str) returns length of string measured in bytes. + +Argument type: STRING + +Return type: INTEGER + +Example:: + + od> SELECT LENGTH('helloworld') + fetched rows / total rows = 1/1 + +------------------------+ + | LENGTH('helloworld') | + |------------------------| + | 10 | + +------------------------+ + LOCATE ------ @@ -1012,10 +1081,21 @@ LOWER Description >>>>>>>>>>> -Specifications: +Usage: lower(string) converts the string to lowercase. + +Argument type: STRING + +Return type: STRING -1. LOWER(STRING T) -> T -2. LOWER(STRING T, STRING) -> T +Example:: + + od> SELECT LOWER('helloworld'), LOWER('HELLOWORLD') + fetched rows / total rows = 1/1 + +-----------------------+-----------------------+ + | LOWER('helloworld') | LOWER('HELLOWORLD') | + |-----------------------+-----------------------| + | helloworld | helloworld | + +-----------------------+-----------------------+ LTRIM @@ -1024,9 +1104,21 @@ LTRIM Description >>>>>>>>>>> -Specifications: +Usage: ltrim(str) trims leading space characters from the string. + +Argument type: STRING -1. LTRIM(STRING T) -> T +Return type: STRING + +Example:: + + od> SELECT LTRIM(' hello'), LTRIM('hello ') + fetched rows / total rows = 1/1 + +---------------------+---------------------+ + | LTRIM(' hello') | LTRIM('hello ') | + |---------------------+---------------------| + | hello | hello | + +---------------------+---------------------+ REPLACE @@ -1057,9 +1149,21 @@ RTRIM Description >>>>>>>>>>> -Specifications: +Usage: rtrim(str) trims trailing space characters from the string. -1. RTRIM(STRING T) -> T +Argument type: STRING + +Return type: STRING + +Example:: + + od> SELECT RTRIM(' hello'), RTRIM('hello ') + fetched rows / total rows = 1/1 + +---------------------+---------------------+ + | RTRIM(' hello') | RTRIM('hello ') | + |---------------------+---------------------| + | hello | hello | + +---------------------+---------------------+ SUBSTRING @@ -1068,9 +1172,23 @@ SUBSTRING Description >>>>>>>>>>> -Specifications: +Usage: substring(str, start) or substring(str, start, length) returns substring using start and length. With no length, entire string from start is returned. -1. SUBSTRING(STRING T, INTEGER, INTEGER) -> T +Argument type: STRING, INTEGER, INTEGER + +Return type: STRING + +Synonyms: SUBSTR + +Example:: + + od> SELECT SUBSTRING('helloworld', 5), SUBSTRING('helloworld', 5, 3) + fetched rows / total rows = 1/1 + +------------------------------+---------------------------------+ + | SUBSTRING('helloworld', 5) | SUBSTRING('helloworld', 5, 3) | + |------------------------------+---------------------------------| + | oworld | owo | + +------------------------------+---------------------------------+ TRIM @@ -1079,9 +1197,19 @@ TRIM Description >>>>>>>>>>> -Specifications: +Argument Type: STRING -1. TRIM(STRING T) -> T +Return type: STRING + +Example:: + + od> SELECT TRIM(' hello'), TRIM('hello ') + fetched rows / total rows = 1/1 + +--------------------+--------------------+ + | TRIM(' hello') | TRIM('hello ') | + |--------------------+--------------------| + | hello | hello | + +--------------------+--------------------+ UPPER @@ -1090,11 +1218,21 @@ UPPER Description >>>>>>>>>>> -Specifications: +Usage: upper(string) converts the string to uppercase. -1. UPPER(STRING T) -> T -2. UPPER(STRING T, STRING) -> T +Argument type: STRING +Return type: STRING + +Example:: + + od> SELECT UPPER('helloworld'), UPPER('HELLOWORLD') + fetched rows / total rows = 1/1 + +-----------------------+-----------------------+ + | UPPER('helloworld') | UPPER('HELLOWORLD') | + |-----------------------+-----------------------| + | HELLOWORLD | HELLOWORLD | + +-----------------------+-----------------------+ Conditional Functions diff --git a/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/SQLIntegTestCase.java b/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/SQLIntegTestCase.java index 1d157d7774..4dc4cc5b8c 100644 --- a/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/SQLIntegTestCase.java +++ b/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/SQLIntegTestCase.java @@ -37,6 +37,7 @@ import static com.amazon.opendistroforelasticsearch.sql.legacy.TestUtils.getPeople2IndexMapping; import static com.amazon.opendistroforelasticsearch.sql.legacy.TestUtils.getPhraseIndexMapping; import static com.amazon.opendistroforelasticsearch.sql.legacy.TestUtils.getResponseBody; +import static com.amazon.opendistroforelasticsearch.sql.legacy.TestUtils.getStringIndexMapping; import static com.amazon.opendistroforelasticsearch.sql.legacy.TestUtils.getWeblogsIndexMapping; import static com.amazon.opendistroforelasticsearch.sql.legacy.TestUtils.isIndexExist; import static com.amazon.opendistroforelasticsearch.sql.legacy.TestUtils.loadDataByRestClient; @@ -491,6 +492,10 @@ public enum Index { "account_null", getBankWithNullValuesIndexMapping(), "src/test/resources/bank_with_null_values.json"), + BANK_WITH_STRING_VALUES(TestsConstants.TEST_INDEX_STRINGS, + "strings", + getStringIndexMapping(), + "src/test/resources/strings.json"), ORDER(TestsConstants.TEST_INDEX_ORDER, "_doc", getOrderIndexMapping(), diff --git a/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/TestUtils.java b/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/TestUtils.java index 1c2d3f5d27..6219b90a5c 100644 --- a/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/TestUtils.java +++ b/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/TestUtils.java @@ -208,6 +208,11 @@ public static String getBankWithNullValuesIndexMapping() { return getMappingFile(mappingFile); } + public static String getStringIndexMapping() { + String mappingFile = "string_index_mapping.json"; + return getMappingFile(mappingFile); + } + public static String getOrderIndexMapping() { String mappingFile = "order_index_mapping.json"; return getMappingFile(mappingFile); diff --git a/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/TestsConstants.java b/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/TestsConstants.java index 0eace4b65d..f742db269d 100644 --- a/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/TestsConstants.java +++ b/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/legacy/TestsConstants.java @@ -55,6 +55,7 @@ public class TestsConstants { public final static String TEST_INDEX_DATE = TEST_INDEX + "_date"; public final static String TEST_INDEX_DATE_TIME = TEST_INDEX + "_datetime"; public final static String TEST_INDEX_DEEP_NESTED = TEST_INDEX + "_deep_nested"; + public final static String TEST_INDEX_STRINGS = TEST_INDEX + "_strings"; public final static String DATE_FORMAT = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"; diff --git a/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/ppl/TextCommandIT.java b/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/ppl/TextCommandIT.java new file mode 100644 index 0000000000..6b4e3182f7 --- /dev/null +++ b/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/ppl/TextCommandIT.java @@ -0,0 +1,126 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.sql.ppl; + +import static com.amazon.opendistroforelasticsearch.sql.legacy.TestsConstants.TEST_INDEX_STRINGS; +import static com.amazon.opendistroforelasticsearch.sql.util.MatcherUtils.rows; +import static com.amazon.opendistroforelasticsearch.sql.util.MatcherUtils.schema; +import static com.amazon.opendistroforelasticsearch.sql.util.MatcherUtils.verifyDataRows; +import static com.amazon.opendistroforelasticsearch.sql.util.MatcherUtils.verifySchema; + +import java.io.IOException; + +import org.json.JSONObject; +import org.junit.jupiter.api.Test; + +public class TextCommandIT extends PPLIntegTestCase { + @Override + public void init() throws IOException { + loadIndex(Index.BANK); + loadIndex(Index.BANK_WITH_STRING_VALUES); + } + + void verifyQuery(String command, String initialArgs, String additionalArgs, + String outputRow1, String outputRow2, String outputRow3) throws IOException { + String query = String.format( + "source=%s | eval f=%s(%sname%s) | fields f", TEST_INDEX_STRINGS, command, initialArgs, additionalArgs); + JSONObject result = executeQuery(query); + verifySchema(result, schema("f", null, "string")); + verifyDataRows(result, rows(outputRow1), rows(outputRow2), rows(outputRow3)); + } + + void verifyQuery(String command, String initialArgs, String additionalArgs, + Integer outputRow1, Integer outputRow2, Integer outputRow3) throws IOException { + String query = String.format( + "source=%s | eval f=%s(%sname%s) | fields f", TEST_INDEX_STRINGS, command, initialArgs, additionalArgs); + JSONObject result = executeQuery(query); + verifySchema(result, schema("f", null, "integer")); + verifyDataRows(result, rows(outputRow1), rows(outputRow2), rows(outputRow3)); + } + + void verifyRegexQuery(String pattern, Integer outputRow1, Integer outputRow2, Integer outputRow3) throws IOException { + String query = String.format( + "source=%s | eval f=name regexp '%s' | fields f", TEST_INDEX_STRINGS, pattern); + JSONObject result = executeQuery(query); + verifySchema(result, schema("f", null, "integer")); + verifyDataRows(result, rows(outputRow1), rows(outputRow2), rows(outputRow3)); + } + + @Test + public void testRegexp() throws IOException { + verifyRegexQuery("hello", 1, 0, 0); + verifyRegexQuery(".*", 1, 1, 1); + } + + @Test + public void testSubstr() throws IOException { + verifyQuery("substr", "", ", 2", "ello", "orld", "elloworld"); + verifyQuery("substr", "", ", 2, 2", "el", "or", "el"); + } + + @Test + public void testSubstring() throws IOException { + verifyQuery("substring", "", ", 2", "ello", "orld", "elloworld"); + verifyQuery("substring", "", ", 2, 2", "el", "or", "el"); + } + + @Test + public void testUpper() throws IOException { + verifyQuery("upper", "", "", "HELLO", "WORLD", "HELLOWORLD"); + } + + @Test + public void testLower() throws IOException { + verifyQuery("lower", "", "", "hello", "world", "helloworld"); + } + + @Test + public void testTrim() throws IOException { + verifyQuery("trim", "", "", "hello", "world", "helloworld"); + } + + @Test + public void testRtrim() throws IOException { + verifyQuery("rtrim", "", "", "hello", "world", "helloworld"); + } + + @Test + public void testLtrim() throws IOException { + verifyQuery("ltrim", "", "", "hello", "world", "helloworld"); + } + + @Test + public void testConcat() throws IOException { + verifyQuery("concat", "", ", 'there'", + "hellothere", "worldthere", "helloworldthere"); + } + + @Test + public void testConcat_ws() throws IOException { + verifyQuery("concat_ws", "',', ", ", 'there'", + "hello,there", "world,there", "helloworld,there"); + } + + @Test + public void testLength() throws IOException { + verifyQuery("length", "", "", 5, 5, 10); + } + + @Test + public void testStrcmp() throws IOException { + verifyQuery("strcmp", "", ", 'world'", -1, 0, -1); + } +} diff --git a/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/TextFunctionIT.java b/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/TextFunctionIT.java new file mode 100644 index 0000000000..1cd3285fe8 --- /dev/null +++ b/integ-test/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/TextFunctionIT.java @@ -0,0 +1,140 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.sql.sql; + +import static com.amazon.opendistroforelasticsearch.sql.legacy.plugin.RestSqlAction.QUERY_API_ENDPOINT; +import static com.amazon.opendistroforelasticsearch.sql.util.MatcherUtils.rows; +import static com.amazon.opendistroforelasticsearch.sql.util.MatcherUtils.schema; +import static com.amazon.opendistroforelasticsearch.sql.util.MatcherUtils.verifyDataRows; +import static com.amazon.opendistroforelasticsearch.sql.util.MatcherUtils.verifySchema; +import static com.amazon.opendistroforelasticsearch.sql.util.TestUtils.getResponseBody; + +import com.amazon.opendistroforelasticsearch.sql.legacy.SQLIntegTestCase; +import com.amazon.opendistroforelasticsearch.sql.util.TestUtils; +import java.io.IOException; +import java.util.Locale; +import org.elasticsearch.client.Request; +import org.elasticsearch.client.RequestOptions; +import org.elasticsearch.client.Response; +import org.json.JSONObject; +import org.junit.jupiter.api.Test; + +public class TextFunctionIT extends SQLIntegTestCase { + + @Override + public void init() throws Exception { + super.init(); + TestUtils.enableNewQueryEngine(client()); + } + + void verifyQuery(String query, String type, String output) throws IOException { + JSONObject result = executeQuery("select " + query); + verifySchema(result, schema(query, null, type)); + verifyDataRows(result, rows(output)); + } + + void verifyQuery(String query, String type, Integer output) throws IOException { + JSONObject result = executeQuery("select " + query); + verifySchema(result, schema(query, null, type)); + verifyDataRows(result, rows(output)); + } + + @Test + public void testRegexp() throws IOException { + verifyQuery("'a' regexp 'b'", "integer", 0); + verifyQuery("'a' regexp '.*'", "integer", 1); + } + + @Test + public void testSubstr() throws IOException { + verifyQuery("substr('hello', 2)", "string", "ello"); + verifyQuery("substr('hello', 2, 2)", "string", "el"); + } + + @Test + public void testSubstring() throws IOException { + verifyQuery("substring('hello', 2)", "string", "ello"); + verifyQuery("substring('hello', 2, 2)", "string", "el"); + } + + @Test + public void testUpper() throws IOException { + verifyQuery("upper('hello')", "string", "HELLO"); + verifyQuery("upper('HELLO')", "string", "HELLO"); + } + + @Test + public void testLower() throws IOException { + verifyQuery("lower('hello')", "string", "hello"); + verifyQuery("lower('HELLO')", "string", "hello"); + } + + @Test + public void testTrim() throws IOException { + verifyQuery("trim(' hello')", "string", "hello"); + verifyQuery("trim('hello ')", "string", "hello"); + verifyQuery("trim(' hello ')", "string", "hello"); + } + + @Test + public void testRtrim() throws IOException { + verifyQuery("rtrim(' hello')", "string", " hello"); + verifyQuery("rtrim('hello ')", "string", "hello"); + verifyQuery("rtrim(' hello ')", "string", " hello"); + } + + @Test + public void testLtrim() throws IOException { + verifyQuery("ltrim(' hello')", "string", "hello"); + verifyQuery("ltrim('hello ')", "string", "hello "); + verifyQuery("ltrim(' hello ')", "string", "hello "); + } + + @Test + public void testConcat() throws IOException { + verifyQuery("concat('hello', 'world')", "string", "helloworld"); + verifyQuery("concat('', 'hello')", "string", "hello"); + } + + @Test + public void testConcat_ws() throws IOException { + verifyQuery("concat_ws(',', 'hello', 'world')", "string", "hello,world"); + verifyQuery("concat_ws(',', '', 'hello')", "string", ",hello"); + } + + @Test + public void testLength() throws IOException { + verifyQuery("length('hello')", "integer", 5); + } + + @Test + public void testStrcmp() throws IOException { + verifyQuery("strcmp('hello', 'world')", "integer", -1); + verifyQuery("strcmp('hello', 'hello')", "integer", 0); + } + + protected JSONObject executeQuery(String query) throws IOException { + Request request = new Request("POST", QUERY_API_ENDPOINT); + request.setJsonEntity(String.format(Locale.ROOT, "{\n" + " \"query\": \"%s\"\n" + "}", query)); + + RequestOptions.Builder restOptionsBuilder = RequestOptions.DEFAULT.toBuilder(); + restOptionsBuilder.addHeader("Content-Type", "application/json"); + request.setOptions(restOptionsBuilder); + + Response response = client().performRequest(request); + return new JSONObject(getResponseBody(response)); + } +} diff --git a/integ-test/src/test/resources/indexDefinitions/string_index_mapping.json b/integ-test/src/test/resources/indexDefinitions/string_index_mapping.json new file mode 100644 index 0000000000..b35a24076d --- /dev/null +++ b/integ-test/src/test/resources/indexDefinitions/string_index_mapping.json @@ -0,0 +1,10 @@ +{ + "mappings": { + "properties": { + "name": { + "type": "text", + "fielddata": true + } + } + } +} diff --git a/integ-test/src/test/resources/strings.json b/integ-test/src/test/resources/strings.json new file mode 100644 index 0000000000..a57f1194c4 --- /dev/null +++ b/integ-test/src/test/resources/strings.json @@ -0,0 +1,6 @@ +{"index":{"_id":"1"}} +{"name" : "hello"} +{"index":{"_id":"2"}} +{"name" : "world"} +{"index":{"_id":"3"}} +{"name" : "helloworld"} diff --git a/legacy/src/main/antlr/OpenDistroSqlLexer.g4 b/legacy/src/main/antlr/OpenDistroSqlLexer.g4 index 6fdb8980a9..772f3f51d1 100644 --- a/legacy/src/main/antlr/OpenDistroSqlLexer.g4 +++ b/legacy/src/main/antlr/OpenDistroSqlLexer.g4 @@ -113,6 +113,7 @@ SUM: 'SUM'; SUBSTRING: 'SUBSTRING'; TRIM: 'TRIM'; YEAR: 'YEAR'; +STRCMP: 'STRCMP'; // Keywords, but can be ID diff --git a/legacy/src/main/antlr/OpenDistroSqlParser.g4 b/legacy/src/main/antlr/OpenDistroSqlParser.g4 index 2fa33864e7..61148a695c 100644 --- a/legacy/src/main/antlr/OpenDistroSqlParser.g4 +++ b/legacy/src/main/antlr/OpenDistroSqlParser.g4 @@ -473,7 +473,7 @@ functionNameBase | LOG10 | LOG2 | LOWER | LTRIM | MAKETIME | MODULUS | MONTH | MONTHNAME | MULTIPLY | NOW | PI | POW | POWER | RADIANS | RAND | REPLACE | RIGHT | RINT | ROUND | RTRIM | SIGN | SIGNUM | SIN | SINH | SQRT | SUBSTRING | SUBTRACT | TAN | TIMESTAMP | TRIM - | UPPER | YEAR | ADDDATE | ADDTIME | GREATEST | LEAST + | UPPER | YEAR | ADDDATE | ADDTIME | GREATEST | LEAST | STRCMP ; esFunctionNameBase diff --git a/ppl/src/main/antlr/OpenDistroPPLLexer.g4 b/ppl/src/main/antlr/OpenDistroPPLLexer.g4 index 83a2fc61d4..3c8377f103 100644 --- a/ppl/src/main/antlr/OpenDistroPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenDistroPPLLexer.g4 @@ -71,6 +71,7 @@ XOR: 'XOR'; TRUE: 'TRUE'; FALSE: 'FALSE'; LIKE: 'LIKE'; +REGEXP: 'REGEXP'; // DATETIME, INTERVAL AND UNIT KEYWORDS DATETIME: 'DATETIME'; @@ -203,6 +204,20 @@ TIME: 'TIME'; TIMESTAMP: 'TIMESTAMP'; ADDDATE: 'ADDDATE'; +// TEXT FUNCTIONS +SUBSTR: 'SUBSTR'; +SUBSTRING: 'SUBSTRING'; +LTRIM: 'LTRIM'; +RTRIM: 'RTRIM'; +TRIM: 'TRIM'; +TO: 'TO'; +LOWER: 'LOWER'; +UPPER: 'UPPER'; +CONCAT: 'CONCAT'; +CONCAT_WS: 'CONCAT_WS'; +LENGTH: 'LENGTH'; +STRCMP: 'STRCMP'; + // LITERALS AND VALUES //STRING_LITERAL: DQUOTA_STRING | SQUOTA_STRING | BQUOTA_STRING; ID: ID_LITERAL; diff --git a/ppl/src/main/antlr/OpenDistroPPLParser.g4 b/ppl/src/main/antlr/OpenDistroPPLParser.g4 index c6ce372aba..1c3f7f87bc 100644 --- a/ppl/src/main/antlr/OpenDistroPPLParser.g4 +++ b/ppl/src/main/antlr/OpenDistroPPLParser.g4 @@ -237,12 +237,12 @@ dateAndTimeFunctionBase ; textFunctionBase - : + : SUBSTR | SUBSTRING | TRIM | LTRIM | RTRIM | LOWER | UPPER | CONCAT | CONCAT_WS | LENGTH | STRCMP ; /** operators */ comparisonOperator - : EQUAL | NOT_EQUAL | LESS | NOT_LESS | GREATER | NOT_GREATER | LIKE + : EQUAL | NOT_EQUAL | LESS | NOT_LESS | GREATER | NOT_GREATER | LIKE | REGEXP ; binaryOperator diff --git a/sql/src/main/antlr/OpenDistroSQLLexer.g4 b/sql/src/main/antlr/OpenDistroSQLLexer.g4 index ca53d4b437..ba6f9d9998 100644 --- a/sql/src/main/antlr/OpenDistroSQLLexer.g4 +++ b/sql/src/main/antlr/OpenDistroSQLLexer.g4 @@ -267,10 +267,13 @@ WEEK_OF_YEAR: 'WEEK_OF_YEAR'; WILDCARDQUERY: 'WILDCARDQUERY'; WILDCARD_QUERY: 'WILDCARD_QUERY'; +// TEXT FUNCTIONS +SUBSTR: 'SUBSTR'; +STRCMP: 'STRCMP'; + // DATE AND TIME FUNCTIONS ADDDATE: 'ADDDATE'; - // Operators // Operators. Arithmetics diff --git a/sql/src/main/antlr/OpenDistroSQLParser.g4 b/sql/src/main/antlr/OpenDistroSQLParser.g4 index 10ffe8d4c4..dbc8d93a7d 100644 --- a/sql/src/main/antlr/OpenDistroSQLParser.g4 +++ b/sql/src/main/antlr/OpenDistroSQLParser.g4 @@ -181,6 +181,7 @@ predicate | left=predicate comparisonOperator right=predicate #binaryComparisonPredicate | predicate IS nullNotnull #isNullPredicate | left=predicate NOT? LIKE right=predicate #likePredicate + | left=predicate REGEXP right=predicate #regexpPredicate ; expressionAtom @@ -212,6 +213,7 @@ functionCall scalarFunctionName : mathematicalFunctionName | dateTimeFunctionName + | textFunctionName ; aggregateFunction @@ -233,6 +235,11 @@ dateTimeFunctionName : DAYOFMONTH | DATE | TIME | TIMESTAMP | ADDDATE ; +textFunctionName + : SUBSTR | SUBSTRING | TRIM | LTRIM | RTRIM | LOWER | UPPER + | CONCAT | CONCAT_WS | SUBSTR | LENGTH | STRCMP + ; + functionArgs : functionArg (COMMA functionArg)* ; diff --git a/sql/src/main/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/AstExpressionBuilder.java b/sql/src/main/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/AstExpressionBuilder.java index 5600cfe022..43530eaf94 100644 --- a/sql/src/main/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/AstExpressionBuilder.java +++ b/sql/src/main/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/AstExpressionBuilder.java @@ -20,6 +20,7 @@ import static com.amazon.opendistroforelasticsearch.sql.expression.function.BuiltinFunctionName.IS_NULL; import static com.amazon.opendistroforelasticsearch.sql.expression.function.BuiltinFunctionName.LIKE; import static com.amazon.opendistroforelasticsearch.sql.expression.function.BuiltinFunctionName.NOT_LIKE; +import static com.amazon.opendistroforelasticsearch.sql.expression.function.BuiltinFunctionName.REGEXP; import static com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.AggregateFunctionContext; import static com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.BinaryComparisonPredicateContext; import static com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.BooleanContext; @@ -29,6 +30,7 @@ import static com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.MathExpressionAtomContext; import static com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.NotExpressionContext; import static com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.NullLiteralContext; +import static com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.RegexpPredicateContext; import static com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.ScalarFunctionCallContext; import static com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.SignedDecimalContext; import static com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.SignedRealContext; @@ -138,6 +140,12 @@ public UnresolvedExpression visitLikePredicate(LikePredicateContext ctx) { Arrays.asList(visit(ctx.left), visit(ctx.right))); } + @Override + public UnresolvedExpression visitRegexpPredicate(RegexpPredicateContext ctx) { + return new Function(REGEXP.getName().getFunctionName(), + Arrays.asList(visit(ctx.left), visit(ctx.right))); + } + @Override public UnresolvedExpression visitAndExpression(AndExpressionContext ctx) { return new And(visit(ctx.left), visit(ctx.right)); diff --git a/sql/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/AstExpressionBuilderTest.java b/sql/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/AstExpressionBuilderTest.java index 38a95e8f53..a7401c0415 100644 --- a/sql/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/AstExpressionBuilderTest.java +++ b/sql/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/AstExpressionBuilderTest.java @@ -216,6 +216,14 @@ public void canBuildLikeExpression() { ); } + @Test + public void canBuildRegexpExpression() { + assertEquals( + function("regexp", stringLiteral("str"), stringLiteral(".*")), + buildExprAst("'str' regexp '.*'") + ); + } + @Test public void canBuildLogicalExpression() { assertEquals(