Skip to content
This repository has been archived by the owner on Aug 2, 2022. It is now read-only.

Support LIKE operator #534

Merged
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,12 @@ public FunctionExpression gte(
repository.compile(BuiltinFunctionName.GTE.getName(), Arrays.asList(expressions), env);
}

public FunctionExpression like(
Environment<Expression, ExprType> env, Expression... expressions) {
return (FunctionExpression)
repository.compile(BuiltinFunctionName.LIKE.getName(), Arrays.asList(expressions), env);
}

public Aggregator avg(Environment<Expression, ExprType> env, Expression... expressions) {
return (Aggregator)
repository.compile(BuiltinFunctionName.AVG.getName(), Arrays.asList(expressions), env);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -171,4 +171,69 @@ public String toString() {
*/
public static final BiPredicate<ExprValue, ExprValue> COMPARE_WITH_NULL_OR_MISSING =
(left, right) -> left.isMissing() || right.isMissing() || left.isNull() || right.isNull();

/**
* Wildcard pattern matcher util.
* Percent (%) character for wildcard,
* Underscore (_) character for a single character match.
* @param pattern string pattern to match.
* @return if text matches pattern returns true; else return false.
*/
public static boolean matches(String text, String pattern) {
return Pattern.compile(patternToRegex(pattern)).matcher(text).matches();
dai-chen marked this conversation as resolved.
Show resolved Hide resolved
}

private static final char DEFAULT_ESCAPE = '\\';

private static String patternToRegex(String patternString) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. could you seperate it from OperatorUtils in case is more focus on SQL regex to Java regex.
  2. by define the ESCAPE is defined in SQL language, how do we support it?
  3. is it possible to seperate the translation rule from translation logic.
    for example,
    TranslationRule.Buidler.put("//%", %").put("%", ".*").build()
    Then, apply rule on the input REGEX.

StringBuilder regex = new StringBuilder(patternString.length() * 2);
regex.append('^');
boolean escaped = false;
for (char currentChar : patternString.toCharArray()) {
if (!escaped && currentChar == DEFAULT_ESCAPE) {
escaped = true;
} else {
switch (currentChar) {
case '%':
if (escaped) {
regex.append("%");
} else {
regex.append(".*");
}
escaped = false;
break;
case '_':
if (escaped) {
regex.append("_");
} else {
regex.append('.');
}
escaped = false;
break;
default:
switch (currentChar) {
case '\\':
case '^':
case '$':
case '.':
case '*':
case '[':
case ']':
case '(':
case ')':
case '|':
case '+':
regex.append('\\');
break;
default:
}

regex.append(currentChar);
escaped = false;
}
}
}
regex.append('$');
return regex.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import com.amazon.opendistroforelasticsearch.sql.expression.function.FunctionName;
import com.amazon.opendistroforelasticsearch.sql.expression.function.FunctionResolver;
import com.amazon.opendistroforelasticsearch.sql.expression.function.FunctionSignature;
import com.amazon.opendistroforelasticsearch.sql.expression.operator.OperatorUtils;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableTable;
import com.google.common.collect.Table;
Expand Down Expand Up @@ -66,6 +67,7 @@ public static void register(BuiltinFunctionRepository repository) {
repository.register(lte());
repository.register(greater());
repository.register(gte());
repository.register(like());
}

/**
Expand Down Expand Up @@ -184,6 +186,21 @@ public static void register(BuiltinFunctionRepository repository) {
.put(LITERAL_MISSING, LITERAL_MISSING, LITERAL_FALSE)
.build();

/**
* The like logic.
* A B A like B
* NULL NULL NULL
* NULL MISSING MISSING
* MISSING NULL MISSING
* MISSING MISSING MISSING
*/
private static Table<ExprValue, ExprValue, ExprValue> likeTable =
new ImmutableTable.Builder<ExprValue, ExprValue, ExprValue>()
.put(LITERAL_NULL, LITERAL_NULL, LITERAL_NULL)
.put(LITERAL_NULL, LITERAL_MISSING, LITERAL_MISSING)
.put(LITERAL_MISSING, LITERAL_NULL, LITERAL_MISSING)
.put(LITERAL_MISSING, LITERAL_MISSING, LITERAL_MISSING)
.build();

private static FunctionResolver and() {
FunctionName functionName = BuiltinFunctionName.AND.getName();
Expand Down Expand Up @@ -309,6 +326,16 @@ private static FunctionResolver gte() {
);
}

private static FunctionResolver like() {
return new FunctionResolver(
BuiltinFunctionName.LIKE.getName(),
likePredicate(
BuiltinFunctionName.LIKE.getName(),
OperatorUtils::matches
)
);
}

private static Map<FunctionSignature, FunctionBuilder> predicate(
FunctionName functionName,
Table<ExprValue, ExprValue, ExprValue> table,
Expand Down Expand Up @@ -443,6 +470,17 @@ public String toString() {
};
}

private static Map<FunctionSignature, FunctionBuilder> likePredicate(
FunctionName functionName,
BiFunction<String, String, Boolean> stringFunc) {
ImmutableMap.Builder<FunctionSignature, FunctionBuilder> builder = new ImmutableMap.Builder<>();
return builder
.put(new FunctionSignature(functionName, Arrays.asList(ExprType.STRING, ExprType.STRING)),
likePattern(functionName, stringFunc, ExprValueUtils::getStringValue,
ExprType.BOOLEAN))
.build();
}

/**
* Building method for operators including.
* less than (<) operator
Expand Down Expand Up @@ -475,4 +513,34 @@ public String toString() {
}
};
}

private static <T, R> FunctionBuilder likePattern(FunctionName functionName,
BiFunction<T, T, R> function,
Function<ExprValue, T> observer,
ExprType returnType) {
dai-chen marked this conversation as resolved.
Show resolved Hide resolved
return arguments -> new FunctionExpression(functionName, arguments) {
@Override
public ExprValue valueOf(Environment<Expression, ExprValue> env) {
ExprValue arg1 = arguments.get(0).valueOf(env);
ExprValue arg2 = arguments.get(1).valueOf(env);
if (likeTable.contains(arg1, arg2)) {
return likeTable.get(arg1, arg2);
} else {
return ExprValueUtils.fromObjectValue(
function.apply(observer.apply(arg1), observer.apply(arg2)));
}
}

@Override
public ExprType type(Environment<Expression, ExprType> typeEnv) {
return returnType;
}

@Override
public String toString() {
return String.format("%s %s %s", arguments.get(0).toString(), functionName, arguments
.get(1).toString());
}
};
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ public class TestConfig {
public static final String INT_TYPE_MISSING_VALUE_FIELD = "int_missing_value";
public static final String BOOL_TYPE_NULL_VALUE_FIELD = "null_value_boolean";
public static final String BOOL_TYPE_MISSING_VALUE_FIELD = "missing_value_boolean";
public static final String STRING_TYPE_NULL_VALUE_FILED = "string_null_value";
public static final String STRING_TYPE_MISSING_VALUE_FILED = "string_missing_value";

private static Map<String, ExprType> typeMapping = new ImmutableMap.Builder<String, ExprType>()
.put("integer_value", ExprType.INTEGER)
Expand All @@ -53,6 +55,8 @@ public class TestConfig {
.put(BOOL_TYPE_NULL_VALUE_FIELD, ExprType.BOOLEAN)
.put(BOOL_TYPE_MISSING_VALUE_FIELD, ExprType.BOOLEAN)
.put("string_value", ExprType.STRING)
.put(STRING_TYPE_NULL_VALUE_FILED, ExprType.STRING)
.put(STRING_TYPE_MISSING_VALUE_FILED, ExprType.STRING)
.put("struct_value", ExprType.STRUCT)
.put("array_value", ExprType.ARRAY)
.build();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
import static com.amazon.opendistroforelasticsearch.sql.config.TestConfig.BOOL_TYPE_NULL_VALUE_FIELD;
import static com.amazon.opendistroforelasticsearch.sql.config.TestConfig.INT_TYPE_MISSING_VALUE_FIELD;
import static com.amazon.opendistroforelasticsearch.sql.config.TestConfig.INT_TYPE_NULL_VALUE_FIELD;
import static com.amazon.opendistroforelasticsearch.sql.config.TestConfig.STRING_TYPE_MISSING_VALUE_FILED;
import static com.amazon.opendistroforelasticsearch.sql.config.TestConfig.STRING_TYPE_NULL_VALUE_FILED;
import static com.amazon.opendistroforelasticsearch.sql.data.model.ExprValueUtils.booleanValue;
import static com.amazon.opendistroforelasticsearch.sql.data.model.ExprValueUtils.collectionValue;
import static com.amazon.opendistroforelasticsearch.sql.data.model.ExprValueUtils.doubleValue;
Expand Down Expand Up @@ -82,9 +84,11 @@ protected Environment<Expression, ExprValue> valueEnv() {
return collectionValue(ImmutableList.of(1));
case BOOL_TYPE_NULL_VALUE_FIELD:
case INT_TYPE_NULL_VALUE_FIELD:
case STRING_TYPE_NULL_VALUE_FILED:
return nullValue();
case INT_TYPE_MISSING_VALUE_FIELD:
case BOOL_TYPE_MISSING_VALUE_FIELD:
case STRING_TYPE_MISSING_VALUE_FILED:
return missingValue();
default:
throw new IllegalArgumentException("undefined reference");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,15 @@
import static com.amazon.opendistroforelasticsearch.sql.config.TestConfig.BOOL_TYPE_NULL_VALUE_FIELD;
import static com.amazon.opendistroforelasticsearch.sql.config.TestConfig.INT_TYPE_MISSING_VALUE_FIELD;
import static com.amazon.opendistroforelasticsearch.sql.config.TestConfig.INT_TYPE_NULL_VALUE_FIELD;
import static com.amazon.opendistroforelasticsearch.sql.config.TestConfig.STRING_TYPE_MISSING_VALUE_FILED;
import static com.amazon.opendistroforelasticsearch.sql.config.TestConfig.STRING_TYPE_NULL_VALUE_FILED;
import static com.amazon.opendistroforelasticsearch.sql.data.model.ExprValueUtils.LITERAL_FALSE;
import static com.amazon.opendistroforelasticsearch.sql.data.model.ExprValueUtils.LITERAL_MISSING;
import static com.amazon.opendistroforelasticsearch.sql.data.model.ExprValueUtils.LITERAL_NULL;
import static com.amazon.opendistroforelasticsearch.sql.data.model.ExprValueUtils.LITERAL_TRUE;
import static com.amazon.opendistroforelasticsearch.sql.data.model.ExprValueUtils.booleanValue;
import static com.amazon.opendistroforelasticsearch.sql.data.model.ExprValueUtils.fromObjectValue;
import static com.amazon.opendistroforelasticsearch.sql.expression.operator.OperatorUtils.matches;
import static com.amazon.opendistroforelasticsearch.sql.utils.ComparisonUtil.compare;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
Expand Down Expand Up @@ -99,6 +102,21 @@ private static Stream<Arguments> testCompareValueArguments() {
return builder.build();
}

private static Stream<Arguments> testLikeArguments() {
List<List<String>> arguments = Arrays.asList(
Arrays.asList("foo", "foo"), Arrays.asList("notFoo", "foo"),
Arrays.asList("foobar", "%bar"), Arrays.asList("bar", "%bar"),
Arrays.asList("foo", "fo_"), Arrays.asList("foo", "foo_"),
Arrays.asList("foorbar", "%o_ar"), Arrays.asList("foobar", "%o_a%"),
Arrays.asList("fooba%_\\^$.*[]()|+r", "%\\%\\_\\\\\\^\\$\\.\\*\\[\\]\\(\\)\\|\\+_")
);
Stream.Builder<Arguments> builder = Stream.builder();
for (List<String> argPair : arguments) {
builder.add(Arguments.of(fromObjectValue(argPair.get(0)), fromObjectValue(argPair.get(1))));
}
return builder.build();
}

@ParameterizedTest(name = "and({0}, {1})")
@MethodSource("binaryPredicateArguments")
public void test_and(Boolean v1, Boolean v2) {
Expand Down Expand Up @@ -524,4 +542,37 @@ public void test_gte_missing() {
assertThrows(ExpressionEvaluationException.class,
() -> gte.valueOf(valueEnv()), "invalid to call type operation on missing value");
}

@ParameterizedTest(name = "like({0}, {1})")
@MethodSource("testLikeArguments")
public void test_like(ExprValue v1, ExprValue v2) {
FunctionExpression like = dsl.like(typeEnv(), DSL.literal(v1), DSL.literal(v2));
assertEquals(ExprType.BOOLEAN, like.type(typeEnv()));
assertEquals(matches(((String) v1.value()), (String) v2.value()),
ExprValueUtils.getBooleanValue(like.valueOf(valueEnv())));
assertEquals(String.format("%s like %s", v1.toString(), v2.toString()), like.toString());
}

@Test
public void test_null_like_missing() {
FunctionExpression like = dsl.like(typeEnv(), DSL.ref(STRING_TYPE_NULL_VALUE_FILED),
DSL.ref(STRING_TYPE_NULL_VALUE_FILED));
assertEquals(ExprType.BOOLEAN, like.type(typeEnv()));
assertEquals(LITERAL_NULL, like.valueOf(valueEnv()));

like = dsl.like(typeEnv(), DSL.ref(STRING_TYPE_MISSING_VALUE_FILED),
DSL.ref(STRING_TYPE_MISSING_VALUE_FILED));
assertEquals(ExprType.BOOLEAN, like.type(typeEnv()));
assertEquals(LITERAL_MISSING, like.valueOf(valueEnv()));

like = dsl.like(typeEnv(), DSL.ref(STRING_TYPE_NULL_VALUE_FILED),
DSL.ref(STRING_TYPE_MISSING_VALUE_FILED));
assertEquals(ExprType.BOOLEAN, like.type(typeEnv()));
assertEquals(LITERAL_MISSING, like.valueOf(valueEnv()));

like = dsl.like(typeEnv(), DSL.ref(STRING_TYPE_MISSING_VALUE_FILED),
DSL.ref(STRING_TYPE_NULL_VALUE_FILED));
assertEquals(ExprType.BOOLEAN, like.type(typeEnv()));
assertEquals(LITERAL_MISSING, like.valueOf(valueEnv()));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,14 @@ public void testGteOperator() throws IOException {
verifyDataRows(result, rows(36), rows(36), rows(39));
}

@Test
public void testLikeOperator() throws IOException {
JSONObject result =
executeQuery(
String.format("source=%s firstname like 'Hatti_' | fields firstname", TEST_INDEX_BANK));
verifyDataRows(result, rows("Hattie"));
}

@Test
public void testBinaryPredicateWithNullValue() {
queryExecutionShouldThrowExceptionDueToNullOrMissingValue(
Expand Down
1 change: 1 addition & 0 deletions ppl/src/main/antlr/OpenDistroPPLLexer.g4
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ AND: 'AND';
XOR: 'XOR';
TRUE: 'TRUE';
FALSE: 'FALSE';
LIKE: 'LIKE';

// DATASET TYPES
DATAMODEL: 'DATAMODEL';
Expand Down
2 changes: 1 addition & 1 deletion ppl/src/main/antlr/OpenDistroPPLParser.g4
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ textFunctionBase

/** operators */
comparisonOperator
: EQUAL | NOT_EQUAL | LESS | NOT_LESS | GREATER | NOT_GREATER
: EQUAL | NOT_EQUAL | LESS | NOT_LESS | GREATER | NOT_GREATER | LIKE
;

binaryOperator
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,15 @@ public void testLogicalXorExpr() {
));
}

@Test
public void testLogicalLikeExpr() {
assertEqual("source=t a like '_a%b%c_d_'",
filter(
relation("t"),
compare("like", field("a"), stringLiteral("_a%b%c_d_"))
));
}

/**
* Todo. search operator should not include functionCall, need to change antlr.
*/
Expand Down