Skip to content
This repository has been archived by the owner on Aug 2, 2022. It is now read-only.

Support WHERE clause in new SQL parser #682

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@

package com.amazon.opendistroforelasticsearch.sql.elasticsearch.storage.script.filter.lucene;

import static com.amazon.opendistroforelasticsearch.sql.elasticsearch.data.type.ElasticsearchDataType.ES_TEXT_KEYWORD;

import com.amazon.opendistroforelasticsearch.sql.data.model.ExprValue;
import com.amazon.opendistroforelasticsearch.sql.data.type.ExprType;
import com.amazon.opendistroforelasticsearch.sql.expression.FunctionExpression;
Expand Down Expand Up @@ -60,13 +62,28 @@ public QueryBuilder build(FunctionExpression func) {
* from reference and literal in function arguments.
*
* @param fieldName field name
* @param fieldType expr fieldType
* @param literal expr literal
* @param fieldType field type
* @param literal field value literal
* @return query
*/
protected QueryBuilder doBuild(String fieldName, ExprType fieldType, ExprValue literal) {
throw new UnsupportedOperationException(
"Subclass doesn't implement this and build method either");
}

/**
* Convert multi-field text field name to its inner keyword field. The limitation and assumption
* is that the keyword field name is always "keyword" which is true by default.
*
* @param fieldName field name
* @param fieldType field type
* @return keyword field name for multi-field, otherwise original field name returned
*/
protected String convertTextToKeyword(String fieldName, ExprType fieldType) {
if (fieldType == ES_TEXT_KEYWORD) {
return fieldName + ".keyword";
}
return fieldName;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@

package com.amazon.opendistroforelasticsearch.sql.elasticsearch.storage.script.filter.lucene;

import static com.amazon.opendistroforelasticsearch.sql.elasticsearch.data.type.ElasticsearchDataType.ES_TEXT_KEYWORD;

import com.amazon.opendistroforelasticsearch.sql.data.model.ExprValue;
import com.amazon.opendistroforelasticsearch.sql.data.type.ExprType;
import org.elasticsearch.index.query.QueryBuilder;
Expand All @@ -30,9 +28,7 @@ public class TermQuery extends LuceneQuery {

@Override
protected QueryBuilder doBuild(String fieldName, ExprType fieldType, ExprValue literal) {
if (fieldType == ES_TEXT_KEYWORD) { // Assume inner field name is always "keyword"
fieldName += ".keyword";
}
fieldName = convertTextToKeyword(fieldName, fieldType);
return QueryBuilders.termQuery(fieldName, literal.value());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ public class WildcardQuery extends LuceneQuery {

@Override
protected QueryBuilder doBuild(String fieldName, ExprType fieldType, ExprValue literal) {
fieldName = convertTextToKeyword(fieldName, fieldType);
String matchText = convertSqlWildcardToLucene(literal.stringValue());
return QueryBuilders.wildcardQuery(fieldName, matchText);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ void should_build_script_query_for_comparison_between_fields() {
@Test
void should_build_bool_query_for_and_or_expression() {
String[] names = { "filter", "should" };
FunctionExpression expr1 = dsl.equal(ref("name", ES_TEXT_KEYWORD), literal("John"));
FunctionExpression expr1 = dsl.equal(ref("name", STRING), literal("John"));
FunctionExpression expr2 = dsl.equal(ref("age", INTEGER), literal(30));
Expression[] exprs = {
dsl.and(expr1, expr2),
Expand All @@ -185,7 +185,7 @@ void should_build_bool_query_for_and_or_expression() {
+ " \"" + names[i] + "\" : [\n"
+ " {\n"
+ " \"term\" : {\n"
+ " \"name.keyword\" : {\n"
+ " \"name\" : {\n"
+ " \"value\" : \"John\",\n"
+ " \"boost\" : 1.0\n"
+ " }\n"
Expand Down Expand Up @@ -233,6 +233,38 @@ void should_build_bool_query_for_not_expression() {
ref("age", INTEGER), literal(30)))));
}

@Test
void should_use_keyword_for_multi_field_in_equality_expression() {
assertEquals(
"{\n"
+ " \"term\" : {\n"
+ " \"name.keyword\" : {\n"
+ " \"value\" : \"John\",\n"
+ " \"boost\" : 1.0\n"
+ " }\n"
+ " }\n"
+ "}",
buildQuery(
dsl.equal(
ref("name", ES_TEXT_KEYWORD), literal("John"))));
}

@Test
void should_use_keyword_for_multi_field_in_like_expression() {
assertEquals(
"{\n"
+ " \"wildcard\" : {\n"
+ " \"name.keyword\" : {\n"
+ " \"wildcard\" : \"John*\",\n"
+ " \"boost\" : 1.0\n"
+ " }\n"
+ " }\n"
+ "}",
buildQuery(
dsl.like(
ref("name", ES_TEXT_KEYWORD), literal("John%"))));
}

private String buildQuery(Expression expr) {
return filterQueryBuilder.build(expr).toString();
}
Expand Down
3 changes: 3 additions & 0 deletions integ-test/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,9 @@ task integTestWithNewEngine(type: RestIntegTestTask) {

exclude 'com/amazon/opendistroforelasticsearch/sql/doctest/**/*IT.class'
exclude 'com/amazon/opendistroforelasticsearch/sql/correctness/**'

// Skip old semantic analyzer IT because analyzer in new engine has different behavior
exclude 'com/amazon/opendistroforelasticsearch/sql/legacy/QueryAnalysisIT.class'
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ public class SQLCorrectnessIT extends CorrectnessTestBase {

private static final String ROOT_DIR = "correctness/";
private static final String[] EXPR_TEST_DIR = { "expressions" };
private static final String[] QUERY_TEST_DIR = { "queries"/*, "bugfixes"*/ }; //TODO: skip bugfixes folder for now since it fails
private static final String[] QUERY_TEST_DIR = { "queries", "bugfixes" };

@Override
protected void init() throws Exception {
Expand Down Expand Up @@ -71,6 +71,4 @@ private void verifyQueries(Path file, Function<String, String> converter) {
}
}



}
2 changes: 2 additions & 0 deletions integ-test/src/test/resources/correctness/bugfixes/234.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
SELECT FlightNum FROM kibana_sample_data_flights where (AvgTicketPrice + 100) <= 1000
SELECT FlightNum FROM kibana_sample_data_flights where ROUND(FlightTimeMin) > ABS(FlightDelayMin)
3 changes: 3 additions & 0 deletions integ-test/src/test/resources/correctness/bugfixes/237.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
SELECT ((Origin = 'Munich Airport') AND (Dest = 'Venice Marco Polo Airport')) AS Calculation_462181953506873347 FROM kibana_sample_data_flights
SELECT ((Origin = 'Munich Airport') OR (Origin = 'Itami Airport')) AS Calculation_462181953506873347 FROM kibana_sample_data_flights
SELECT NOT (Origin = 'Munich Airport') AS Calculation_462181953506873347 FROM kibana_sample_data_flights
2 changes: 2 additions & 0 deletions integ-test/src/test/resources/correctness/bugfixes/368.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
SELECT Origin FROM kibana_sample_data_flights WHERE Origin LIKE 'London Hea%'
SELECT Origin FROM kibana_sample_data_flights WHERE Origin LIKE '%International%'
2 changes: 1 addition & 1 deletion integ-test/src/test/resources/correctness/bugfixes/521.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
SELECT timestamp, COUNT(*) FROM kibana_sample_data_flights GROUP BY timestamp
SELECT timestamp FROM kibana_sample_data_flights GROUP BY timestamp
4 changes: 4 additions & 0 deletions integ-test/src/test/resources/correctness/bugfixes/690.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
SELECT FlightNum, Origin FROM kibana_sample_data_flights WHERE NULL IS NULL
SELECT FlightNum, Origin FROM kibana_sample_data_flights WHERE NULL IS NOT NULL
SELECT FlightNum, Origin FROM kibana_sample_data_flights WHERE NULL IS NULL AND NULL IS NULL
SELECT FlightNum, Origin FROM kibana_sample_data_flights WHERE NULL IS NULL OR NULL IS NULL
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
true AND true AS bool
false AND true AS bool
false OR false AS bool
true or false AS bool
NOT true AS bool
NOT false AS bool
NOT (true AND false) AS bool
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,13 @@
"type": "keyword"
},
"Dest": {
"type": "keyword"
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"DestAirportID": {
"type": "keyword"
Expand Down Expand Up @@ -53,7 +59,13 @@
"type": "float"
},
"Origin": {
"type": "keyword"
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"OriginAirportID": {
"type": "keyword"
Expand Down
7 changes: 7 additions & 0 deletions integ-test/src/test/resources/correctness/queries/select.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,10 @@ SELECT abs(DistanceMiles), Abs(FlightDelayMin) FROM kibana_sample_data_flights
SELECT Cancelled AS Cancel, AvgTicketPrice AS ATP FROM kibana_sample_data_flights
SELECT Cancelled AS `Cancel`, AvgTicketPrice AS "ATP" FROM kibana_sample_data_flights
SELECT AvgTicketPrice, Carrier FROM kibana_sample_data_flights WHERE AvgTicketPrice <= 500
SELECT AvgTicketPrice, Carrier FROM kibana_sample_data_flights WHERE NOT AvgTicketPrice <= 500
SELECT AvgTicketPrice, Carrier FROM kibana_sample_data_flights WHERE AvgTicketPrice <= 500 AND FlightDelayMin = 0
SELECT AvgTicketPrice, Carrier FROM kibana_sample_data_flights WHERE AvgTicketPrice <= 500 OR FlightDelayMin = 0
SELECT AvgTicketPrice, Carrier FROM kibana_sample_data_flights WHERE AvgTicketPrice + 100 <= 500
SELECT AvgTicketPrice, Carrier FROM kibana_sample_data_flights WHERE ABS(AvgTicketPrice * -2) > 1000
SELECT AvgTicketPrice, Carrier FROM kibana_sample_data_flights WHERE Carrier LIKE 'JetBeat_'
SELECT AvgTicketPrice, Carrier FROM kibana_sample_data_flights WHERE Carrier LIKE '%Air%'
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,8 @@ public void skipExplainThatNotSupport() {
@Test
public void skipQueryThatNotSupport() {
SQLQueryRequest request = new SQLQueryRequest(
new JSONObject("{\"query\": \"SELECT * FROM test WHERE age = 10\"}"),
"SELECT * FROM test WHERE age = 10",
new JSONObject("{\"query\": \"SELECT * FROM test WHERE age = 10 GROUP BY age\"}"),
"SELECT * FROM test WHERE age = 10 GROUP BY age",
QUERY_API_ENDPOINT,
"");

Expand Down
9 changes: 8 additions & 1 deletion sql/src/main/antlr/OpenDistroSQLParser.g4
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,12 @@ selectElement

fromClause
: FROM tableName
(whereClause)?
;

whereClause
: WHERE expression
;

// Literals

Expand Down Expand Up @@ -142,7 +146,10 @@ timestampLiteral

// Simplified approach for expression
expression
: predicate #predicateExpression
: NOT expression #notExpression
| left=expression AND right=expression #andExpression
| left=expression OR right=expression #orExpression
| predicate #predicateExpression
;

predicate
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,12 @@
import static com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.SelectClauseContext;
import static com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.SelectElementContext;
import static com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.SimpleSelectContext;
import static com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.WhereClauseContext;

import com.amazon.opendistroforelasticsearch.sql.ast.expression.Alias;
import com.amazon.opendistroforelasticsearch.sql.ast.expression.AllFields;
import com.amazon.opendistroforelasticsearch.sql.ast.expression.UnresolvedExpression;
import com.amazon.opendistroforelasticsearch.sql.ast.tree.Filter;
import com.amazon.opendistroforelasticsearch.sql.ast.tree.Project;
import com.amazon.opendistroforelasticsearch.sql.ast.tree.Relation;
import com.amazon.opendistroforelasticsearch.sql.ast.tree.UnresolvedPlan;
Expand Down Expand Up @@ -90,7 +92,16 @@ public UnresolvedPlan visitSelectClause(SelectClauseContext ctx) {

@Override
public UnresolvedPlan visitFromClause(FromClauseContext ctx) {
return new Relation(visitAstExpression(ctx.tableName().qualifiedName()));
UnresolvedPlan result = new Relation(visitAstExpression(ctx.tableName().qualifiedName()));
if (ctx.whereClause() != null) {
result = visit(ctx.whereClause()).attach(result);
}
return result;
}

@Override
public UnresolvedPlan visitWhereClause(WhereClauseContext ctx) {
return new Filter(visitAstExpression(ctx.expression()));
}

@Override
Expand Down
Loading