Skip to content
This repository has been archived by the owner on Aug 2, 2022. It is now read-only.

Expression pushdown optimization #663

Merged
Changes from 1 commit
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
5198a38
Test serializer
dai-chen Jul 20, 2020
75005eb
Merge branch 'develop' into expression-pushdown-optimization
dai-chen Jul 30, 2020
705397b
Add JDK serializer impl
dai-chen Jul 31, 2020
defdd08
Add UT
dai-chen Jul 31, 2020
6a51c26
Support boolean literal
dai-chen Jul 31, 2020
24a4423
Add UT for comparison expression
dai-chen Jul 31, 2020
2283de1
Add UT for function expression
dai-chen Jul 31, 2020
bd37727
Add UT for multiple fields
dai-chen Jul 31, 2020
8e4364b
Use expr value factory
dai-chen Aug 3, 2020
5e42d86
Add comments
dai-chen Aug 3, 2020
1614040
Merge branch 'develop' into expression-pushdown-optimization
dai-chen Aug 3, 2020
cca0479
Test coverage
dai-chen Aug 4, 2020
de03a58
Refactor package and class
dai-chen Aug 4, 2020
f77a739
Add UT for script factory and leaf factory
dai-chen Aug 4, 2020
c95c2e1
Add support for date doc value
dai-chen Aug 5, 2020
3193fdc
More UT
dai-chen Aug 5, 2020
f29b402
Refactor edge case UT
dai-chen Aug 5, 2020
8bb9405
Add UT for value factory
dai-chen Aug 5, 2020
0c0b0d5
Add UT for serializer error case
dai-chen Aug 5, 2020
45bf93d
Fix checkstyle
dai-chen Aug 5, 2020
8ccd384
Add expression visitor and UT
dai-chen Aug 5, 2020
4d0c97b
Use expression visitor in script engine
dai-chen Aug 5, 2020
f839f8f
Add UT for visitor
dai-chen Aug 5, 2020
0bac7b5
Push down query
dai-chen Aug 5, 2020
da2ece3
Handle text keyword
dai-chen Aug 6, 2020
81acd11
Cast long/double doc value to int/float
dai-chen Aug 6, 2020
0ddcab4
Don't push down if illegal state exception thrown
dai-chen Aug 6, 2020
6d259fc
Fix broken IT due to field type change
dai-chen Aug 6, 2020
154358a
Prepare PR
dai-chen Aug 6, 2020
f6eafcb
Address PR comments
dai-chen Aug 7, 2020
af8beb3
Address PR comments
dai-chen Aug 8, 2020
5952930
Merge branch 'develop' into expression-pushdown-optimization
dai-chen Aug 11, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Use expr value factory
  • Loading branch information
dai-chen committed Aug 3, 2020
commit 8e4364b6ff3fab6c0f9450637e9941714920c48d
Original file line number Diff line number Diff line change
@@ -95,17 +95,17 @@ private ExprValue construct(String field, JsonNode value) {

ExprType type = type(field);
if (type.equals(INTEGER)) {
return constructInteger(value);
return constructInteger(value.intValue());
} else if (type.equals(LONG)) {
return constructLong(value);
return constructLong(value.longValue());
} else if (type.equals(FLOAT)) {
return constructFloat(value);
return constructFloat(value.floatValue());
} else if (type.equals(DOUBLE)) {
return constructDouble(value);
return constructDouble(value.doubleValue());
} else if (type.equals(STRING)) {
return constructString(value);
return constructString(value.textValue());
} else if (type.equals(BOOLEAN)) {
return constructBoolean(value);
return constructBoolean(value.booleanValue());
} else if (type.equals(STRUCT)) {
return constructStruct(value, field);
} else if (type.equals(ARRAY)) {
@@ -121,6 +121,35 @@ private ExprValue construct(String field, JsonNode value) {
}
}

/**
* Construct ExprValue from field and its value object.
* @param field field name
* @param value value object
* @return ExprValue
*/
public ExprValue construct(String field, Object value) {
ExprType type = type(field);
if (type.equals(INTEGER)) {
return constructInteger((Integer) value);
} else if (type.equals(LONG)) {
return constructLong((Long) value);
} else if (type.equals(FLOAT)) {
return constructFloat((Float) value);
} else if (type.equals(DOUBLE)) {
return constructDouble((Double) value);
} else if (type.equals(STRING)) {
return constructString((String) value);
} else if (type.equals(BOOLEAN)) {
return constructBoolean((Boolean) value);
} else if (type.equals(ES_TEXT)) {
return new ElasticsearchExprTextValue((String) value);
} else {
throw new IllegalStateException(String.format(
"Doesn't support construct expression value from object: "
+ "%s for field: %s, value: %s.", type.typeName(), field, value));
}
}

private ExprType type(String field) {
if (typeMapping.containsKey(field)) {
return typeMapping.get(field);
@@ -129,28 +158,28 @@ private ExprType type(String field) {
}
}

private ExprIntegerValue constructInteger(JsonNode value) {
return new ExprIntegerValue(value.intValue());
private ExprIntegerValue constructInteger(Integer value) {
return new ExprIntegerValue(value);
}

private ExprLongValue constructLong(JsonNode value) {
return new ExprLongValue(value.longValue());
private ExprLongValue constructLong(Long value) {
return new ExprLongValue(value);
}

private ExprFloatValue constructFloat(JsonNode value) {
return new ExprFloatValue(value.floatValue());
private ExprFloatValue constructFloat(Float value) {
return new ExprFloatValue(value);
}

private ExprDoubleValue constructDouble(JsonNode value) {
return new ExprDoubleValue(value.doubleValue());
private ExprDoubleValue constructDouble(Double value) {
return new ExprDoubleValue(value);
}

private ExprStringValue constructString(JsonNode value) {
return new ExprStringValue(value.textValue());
private ExprStringValue constructString(String value) {
return new ExprStringValue(value);
}

private ExprBooleanValue constructBoolean(JsonNode value) {
return ExprBooleanValue.of(value.booleanValue());
private ExprBooleanValue constructBoolean(Boolean value) {
return ExprBooleanValue.of(value);
}

/**
Original file line number Diff line number Diff line change
@@ -16,9 +16,11 @@

package com.amazon.opendistroforelasticsearch.sql.elasticsearch.storage.script;

import com.amazon.opendistroforelasticsearch.sql.data.model.ExprTupleValue;
import com.amazon.opendistroforelasticsearch.sql.data.model.ExprValue;
import com.amazon.opendistroforelasticsearch.sql.data.model.ExprValueUtils;
import com.amazon.opendistroforelasticsearch.sql.data.type.ExprCoreType;
import com.amazon.opendistroforelasticsearch.sql.data.type.ExprType;
import com.amazon.opendistroforelasticsearch.sql.elasticsearch.data.value.ElasticsearchExprValueFactory;
import com.amazon.opendistroforelasticsearch.sql.expression.Expression;
import com.amazon.opendistroforelasticsearch.sql.expression.FunctionExpression;
import com.amazon.opendistroforelasticsearch.sql.expression.ReferenceExpression;
@@ -28,6 +30,7 @@
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.lucene.index.LeafReaderContext;
import org.elasticsearch.SpecialPermission;
import org.elasticsearch.index.fielddata.ScriptDocValues;
@@ -56,44 +59,58 @@ public ExpressionScript(Expression expression,
@Override
public boolean execute() {
// Check we ourselves are not being called by unprivileged code.
SpecialPermission.check();
SecurityManager sm = System.getSecurityManager();
if (sm != null) {
SpecialPermission.check();
}

return AccessController.doPrivileged((PrivilegedAction<Boolean>) () -> {

// 1) getDoc() is not iterable;
// 2) Doc value is array; 3) Get text field ends up with exception
Set<String> fieldNames = extractInputFieldNames();
Map<String, Object> values = extractFieldNameAndValues(fieldNames);
ExprValue result = evaluateExpression(values);
Set<ReferenceExpression> fields = extractInputFields();
ElasticsearchExprValueFactory valueFactory = buildValueFactory(fields);
Map<String, ExprValue> valueEnv = buildValueEnv(fields, valueFactory);
ExprValue result = evaluateExpression(valueEnv);
return (Boolean) result.value();
});
}

private Set<String> extractInputFieldNames() {
Set<String> fieldNames = new HashSet<>();
doExtractInputFieldNames(expression, fieldNames);
return fieldNames;
private Set<ReferenceExpression> extractInputFields() {
Set<ReferenceExpression> fields = new HashSet<>();
doExtractInputFields(expression, fields);
return fields;
}

private void doExtractInputFieldNames(Expression expr, Set<String> fieldNames) {
if (expr instanceof FunctionExpression) { // Assume only function input arguments is recursive
private void doExtractInputFields(Expression expr, Set<ReferenceExpression> fields) {
if (expr instanceof FunctionExpression) {
FunctionExpression func = (FunctionExpression) expr;
func.getArguments().forEach(argExpr -> doExtractInputFieldNames(argExpr, fieldNames));
func.getArguments().forEach(argExpr -> doExtractInputFields(argExpr, fields));
} else if (expr instanceof ReferenceExpression) {
ReferenceExpression ref = (ReferenceExpression) expr;
fieldNames.add(ref.getAttr());
}
fields.add(ref);
} // else ignore other expressions, ex. literal, aggregator etc.
}

private Map<String, Object> extractFieldNameAndValues(Set<String> fieldNames) {
Map<String, Object> values = new HashMap<>();
for (String fieldName : fieldNames) {
private ElasticsearchExprValueFactory buildValueFactory(Set<ReferenceExpression> fields) {
Map<String, ExprType> typeEnv = fields.stream()
.collect(Collectors.toMap(
ReferenceExpression::getAttr,
ReferenceExpression::type));
return new ElasticsearchExprValueFactory(typeEnv);
}

private Map<String, ExprValue> buildValueEnv(Set<ReferenceExpression> fields,
ElasticsearchExprValueFactory valueFactory) {
Map<String, ExprValue> valueEnv = new HashMap<>();
for (ReferenceExpression field : fields) {
String fieldName = field.getAttr();
ScriptDocValues<?> value = extractFieldValue(fieldName);
if (value != null && !value.isEmpty()) {
values.put(fieldName, value.get(0));
valueEnv.put(fieldName, valueFactory.construct(fieldName, value.get(0)));
}
}
return values;
return valueEnv;
}

private ScriptDocValues<?> extractFieldValue(String fieldName) {
@@ -109,13 +126,14 @@ private ScriptDocValues<?> extractFieldValue(String fieldName) {
return value;
}

private ExprValue evaluateExpression(Map<String, Object> values) {
ExprValue tupleValue = ExprValueUtils.tupleValue(values);
private ExprValue evaluateExpression(Map<String, ExprValue> valueEnv) {
ExprTupleValue tupleValue = ExprTupleValue.fromExprValueMap(valueEnv);
ExprValue result = expression.valueOf(tupleValue.bindingTuples());

if (result.type() != ExprCoreType.BOOLEAN) {
throw new IllegalStateException("Expression has wrong result type: " + result);
}
return result;
}

}
Original file line number Diff line number Diff line change
@@ -82,8 +82,7 @@ void should_match_doc_if_true_comparison_expression() {
assertThat()
.docValues("age", 30)
.filterBy(
dsl.greater(
ref("age", INTEGER), literal(20)))
dsl.greater(ref("age", INTEGER), literal(20)))
.shouldMatch();
}

@@ -105,10 +104,8 @@ void can_execute_expression_script_with_multiple_fields_involved() {
"name", "John")
.filterBy(
dsl.and(
dsl.less(
ref("age", INTEGER), literal(50)),
dsl.equal(
ref("name", STRING), literal("John"))))
dsl.less(ref("age", INTEGER), literal(50)),
dsl.equal(ref("name", STRING), literal("John"))))
.shouldMatch();
}

@@ -134,7 +131,8 @@ ExprScriptAssertion docValues(String name, Object value) {
return this;
}

ExprScriptAssertion docValues(String name1, Object value1, String name2, Object value2) {
ExprScriptAssertion docValues(String name1, Object value1,
String name2, Object value2) {
LeafDocLookup leafDocLookup = mockLeafDocLookup(
ImmutableMap.of(
name1, toDocValue(value1),
@@ -158,7 +156,7 @@ void shouldNotMatch() {
Assertions.assertFalse(isMatched);
}

private ScriptDocValues<?> toDocValue(Object object) {
private static ScriptDocValues<?> toDocValue(Object object) {
if (object instanceof Integer) {
return new FakeScriptDocValues<>((Integer) object);
} else if (object instanceof Long) {
@@ -174,15 +172,15 @@ private ScriptDocValues<?> toDocValue(Object object) {
}
}

private LeafDocLookup mockLeafDocLookup(Map<String, ScriptDocValues<?>> docValueByNames) {
private static LeafDocLookup mockLeafDocLookup(
Map<String, ScriptDocValues<?>> docValueByNames) {
LeafDocLookup leafDocLookup = mock(LeafDocLookup.class);
when(leafDocLookup.containsKey(anyString()))
.thenAnswer(invocation -> docValueByNames.containsKey(invocation.<String>getArgument(0)));
when(leafDocLookup.get(anyString()))
.thenAnswer(invocation -> docValueByNames.get(invocation.<String>getArgument(0)));
return leafDocLookup;
}

}

@RequiredArgsConstructor
Original file line number Diff line number Diff line change
@@ -17,6 +17,7 @@

import com.amazon.opendistroforelasticsearch.sql.elasticsearch.storage.script.ExpressionScriptEngine;
import com.amazon.opendistroforelasticsearch.sql.elasticsearch.setting.ElasticsearchSettings;
import com.amazon.opendistroforelasticsearch.sql.elasticsearch.storage.serialization.DefaultExpressionSerializer;
import com.amazon.opendistroforelasticsearch.sql.legacy.esdomain.LocalClusterState;
import com.amazon.opendistroforelasticsearch.sql.legacy.executor.AsyncRestExecutor;
import com.amazon.opendistroforelasticsearch.sql.legacy.metrics.Metrics;
@@ -150,7 +151,7 @@ public List<Setting<?>> getSettings() {

@Override
public ScriptEngine getScriptEngine(Settings settings, Collection<ScriptContext<?>> contexts) {
return new ExpressionScriptEngine();
return new ExpressionScriptEngine(new DefaultExpressionSerializer());
}

}