Skip to content
This repository has been archived by the owner on Dec 1, 2022. It is now read-only.

Add support in using relational expressions as filters in LOOKUP clause #1251

Merged
merged 5 commits into from
Jul 20, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 8 additions & 9 deletions src/validator/LookupValidator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,10 @@ StatusOr<Expression*> LookupValidator::handleLogicalExprOperands(LogicalExpressi
}

StatusOr<Expression*> LookupValidator::checkFilter(Expression* expr) {
// TODO: Support IN expression push down
if (expr->isRelExpr()) {
return checkRelExpr(static_cast<RelationalExpression*>(expr));
}
switch (expr->kind()) {
case Expression::Kind::kLogicalOr: {
ExpressionUtils::pullOrs(expr);
Expand All @@ -157,14 +161,6 @@ StatusOr<Expression*> LookupValidator::checkFilter(Expression* expr) {
ExpressionUtils::pullAnds(expr);
return handleLogicalExprOperands(static_cast<LogicalExpression*>(expr));
}
case Expression::Kind::kRelLE:
case Expression::Kind::kRelGE:
case Expression::Kind::kRelEQ:
case Expression::Kind::kRelLT:
case Expression::Kind::kRelGT:
case Expression::Kind::kRelNE: {
return checkRelExpr(static_cast<RelationalExpression*>(expr));
}
default: {
return Status::SemanticError("Expression %s not supported yet",
expr->toString().c_str());
Expand Down Expand Up @@ -261,7 +257,10 @@ StatusOr<Value> LookupValidator::checkConstExpr(Expression* expr,
}

if (v.type() != SchemaUtil::propTypeToValueType(type)) {
return Status::SemanticError("Column type error : %s", prop.c_str());
// allow diffrent types in the IN expression, such as "abc" IN ["abc"]
if (v.type() != Value::Type::LIST) {
return Status::SemanticError("Column type error : %s", prop.c_str());
}
}
return v;
}
Expand Down
10 changes: 0 additions & 10 deletions tests/tck/features/lookup/ByIndex.feature
Original file line number Diff line number Diff line change
Expand Up @@ -94,16 +94,6 @@ Feature: Lookup by index itself
LOOKUP ON team WHERE 1 + 1 == 2 YIELD team.name AS Name
"""
Then a SemanticError should be raised at runtime:
When executing query:
"""
LOOKUP ON team WHERE team.name CONTAINS 'Jazz'
"""
Then a SemanticError should be raised at runtime:
When executing query:
"""
LOOKUP ON team WHERE team.name CONTAINS 'Jazz' YIELD team.name AS Name
"""
Then a SemanticError should be raised at runtime:
When executing query:
"""
LOOKUP ON player WHERE player.age > 9223372036854775807+1
Expand Down
10 changes: 0 additions & 10 deletions tests/tck/features/lookup/ByIndex.intVid.feature
Original file line number Diff line number Diff line change
Expand Up @@ -94,16 +94,6 @@ Feature: Lookup by index itself in integer vid
LOOKUP ON team WHERE 1 + 1 == 2 YIELD team.name AS Name
"""
Then a SemanticError should be raised at runtime:
When executing query:
"""
LOOKUP ON team WHERE team.name CONTAINS 'Jazz'
"""
Then a SemanticError should be raised at runtime:
When executing query:
"""
LOOKUP ON team WHERE team.name CONTAINS 'Jazz' YIELD team.name AS Name
"""
Then a SemanticError should be raised at runtime:
When executing query:
"""
LOOKUP ON player WHERE player.age > 9223372036854775807+1
Expand Down
289 changes: 289 additions & 0 deletions tests/tck/features/lookup/EdgeIndexFullScan.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,289 @@
Feature: Lookup edge index full scan

Background:
Given an empty graph
And create a space with following options:
| partition_num | 9 |
| replica_factor | 1 |
| vid_type | FIXED_STRING(15) |
| charset | utf8 |
| collate | utf8_bin |
And having executed:
"""
CREATE EDGE edge_1(col1_str string, col2_int int);
"""
And wait 3 seconds
# index on col1_str
And having executed:
"""
CREATE EDGE INDEX col1_str_index ON edge_1(col1_str(10))
"""
# index on col2_int
And having executed:
"""
CREATE EDGE INDEX col2_int_index ON edge_1(col2_int)
"""
And wait 3 seconds
And having executed:
"""
INSERT EDGE
edge_1(col1_str, col2_int)
VALUES
'101'->'102':('Red1', 11),
'102'->'103':('Yellow', 22),
'103'->'101':('Blue', 33);
"""
And wait 3 seconds

Scenario: Edge with relational RegExp filter[1]
When executing query:
"""
LOOKUP ON edge_1 WHERE edge_1.col1_str =~ "\\w+\\d+" YIELD edge_1.col1_str
"""
Then the result should be, in any order:
| SrcVID | DstVID | Ranking | edge_1.col1_str |
| "101" | "102" | 0 | "Red1" |
When executing query:
"""
LOOKUP ON edge_1 WHERE edge_1.col1_str =~ "\\w+ll\\w+" YIELD edge_1.col1_str
"""
Then the result should be, in any order:
| SrcVID | DstVID | Ranking | edge_1.col1_str |
| "102" | "103" | 0 | "Yellow" |

# skip because `make fmt` will delete '\' in the operator info and causes tests fail
@skip
Scenario: Edge with relational RegExp filter[2]
When profiling query:
"""
LOOKUP ON edge_1 where edge_1.col1_str =~ "\\d+\\w+" YIELD edge_1.col1_str
"""
Then the result should be, in any order:
| SrcVID | DstVID | Ranking | edge_1.col1_str |
| "101" | "102" | 0 | "Red1" |
And the execution plan should be:
| id | name | dependencies | operator info |
| 3 | Project | 2 | |
| 2 | Filter | 4 | {"condition": "(edge_1.col1_str=~\"\w+\d+\")"} |
| 4 | EdgeIndexFullScan | 0 | |
| 0 | Start | | |
When profiling query:
"""
LOOKUP ON edge_1 where edge_1.col1_str =~ "\\w+ea\\w+" YIELD edge_1.col1_str
"""
Then the result should be, in any order:
| SrcVID | DstVID | Ranking | edge_1.col1_str |
| "102" | "103" | 0 | "Yellow" |
And the execution plan should be:
| id | name | dependencies | operator info |
| 3 | Project | 2 | |
| 2 | Filter | 4 | {"condition": "(edge_1.col1_str=~\"\w+ea\w+\")"} |
| 4 | EdgeIndexFullScan | 0 | |
| 0 | Start | | |

Scenario: Edge with relational NE filter
When profiling query:
"""
LOOKUP ON edge_1 WHERE edge_1.col1_str != "Yellow" YIELD edge_1.col1_str
"""
Then the result should be, in any order:
| SrcVID | DstVID | Ranking | edge_1.col1_str |
| "101" | "102" | 0 | "Red1" |
| "103" | "101" | 0 | "Blue" |
And the execution plan should be:
| id | name | dependencies | operator info |
| 3 | Project | 2 | |
| 2 | Filter | 4 | {"condition": "(edge_1.col1_str!=\"Yellow\")"} |
| 4 | EdgeIndexFullScan | 0 | |
| 0 | Start | | |
When profiling query:
"""
LOOKUP ON edge_1 WHERE edge_1.col2_int != 11 YIELD edge_1.col2_int
"""
Then the result should be, in any order:
| SrcVID | DstVID | Ranking | edge_1.col2_int |
| "103" | "101" | 0 | 33 |
| "102" | "103" | 0 | 22 |
And the execution plan should be:
| id | name | dependencies | operator info |
| 3 | Project | 2 | |
| 2 | Filter | 4 | {"condition": "(edge_1.col2_int!=11)"} |
| 4 | EdgeIndexFullScan | 0 | |
| 0 | Start | | |

Scenario: Edge with relational IN/NOT IN filter
When profiling query:
"""
LOOKUP ON edge_1 WHERE edge_1.col1_str IN ["Red", "Yellow"] YIELD edge_1.col1_str
"""
Then the result should be, in any order:
| SrcVID | DstVID | Ranking | edge_1.col1_str |
| "102" | "103" | 0 | "Yellow" |
And the execution plan should be:
| id | name | dependencies | operator info |
| 3 | Project | 2 | |
| 2 | Filter | 4 | {"condition": "(edge_1.col1_str IN [\"Red\",\"Yellow\"])"} |
| 4 | EdgeIndexFullScan | 0 | |
| 0 | Start | | |
When executing query:
"""
LOOKUP ON edge_1 WHERE edge_1.col1_str IN ["non-existed-name"] YIELD edge_1.col1_str
"""
Then the result should be, in any order:
| SrcVID | DstVID | Ranking | edge_1.col1_str |
When profiling query:
"""
LOOKUP ON edge_1 WHERE edge_1.col2_int IN [23 - 1 , 66/2] YIELD edge_1.col2_int
"""
Then the result should be, in any order:
| SrcVID | DstVID | Ranking | edge_1.col2_int |
| "103" | "101" | 0 | 33 |
| "102" | "103" | 0 | 22 |
And the execution plan should be:
| id | name | dependencies | operator info |
| 3 | Project | 2 | |
| 2 | Filter | 4 | {"condition": "(edge_1.col2_int IN [22,33])"} |
| 4 | EdgeIndexFullScan | 0 | |
| 0 | Start | | |
When profiling query:
"""
LOOKUP ON edge_1 WHERE edge_1.col1_str NOT IN ["Blue"] YIELD edge_1.col1_str
"""
Then the result should be, in any order:
| SrcVID | DstVID | Ranking | edge_1.col1_str |
| "101" | "102" | 0 | "Red1" |
| "102" | "103" | 0 | "Yellow" |
And the execution plan should be:
| id | name | dependencies | operator info |
| 3 | Project | 2 | |
| 2 | Filter | 4 | {"condition": "(edge_1.col1_str NOT IN [\"Blue\"])"} |
| 4 | EdgeIndexFullScan | 0 | |
| 0 | Start | | |
When profiling query:
"""
LOOKUP ON edge_1 WHERE edge_1.col2_int NOT IN [23 - 1 , 66/2] YIELD edge_1.col2_int
"""
Then the result should be, in any order:
| SrcVID | DstVID | Ranking | edge_1.col2_int |
| "101" | "102" | 0 | 11 |
And the execution plan should be:
| id | name | dependencies | operator info |
| 3 | Project | 2 | |
| 2 | Filter | 4 | {"condition": "(edge_1.col2_int NOT IN [22,33])"} |
| 4 | EdgeIndexFullScan | 0 | |
| 0 | Start | | |

Scenario: Edge with relational CONTAINS/NOT CONTAINS filter
When profiling query:
"""
LOOKUP ON edge_1 WHERE edge_1.col1_str CONTAINS toLower("L") YIELD edge_1.col1_str
"""
Then the result should be, in any order:
| SrcVID | DstVID | Ranking | edge_1.col1_str |
| "103" | "101" | 0 | "Blue" |
| "102" | "103" | 0 | "Yellow" |
And the execution plan should be:
| id | name | dependencies | operator info |
| 3 | Project | 2 | |
| 2 | Filter | 4 | {"condition": "(edge_1.col1_str CONTAINS \"l\")"} |
| 4 | EdgeIndexFullScan | 0 | |
| 0 | Start | | |
When executing query:
"""
LOOKUP ON edge_1 WHERE edge_1.col1_str CONTAINS "ABC" YIELD edge_1.col1_str
"""
Then the result should be, in any order:
| SrcVID | DstVID | Ranking | edge_1.col1_str |
When profiling query:
"""
LOOKUP ON edge_1 WHERE edge_1.col1_str NOT CONTAINS toLower("L") YIELD edge_1.col1_str
"""
Then the result should be, in any order:
| SrcVID | DstVID | Ranking | edge_1.col1_str |
| "101" | "102" | 0 | "Red1" |
And the execution plan should be:
| id | name | dependencies | operator info |
| 3 | Project | 2 | |
| 2 | Filter | 4 | {"condition": "(edge_1.col1_str NOT CONTAINS \"l\")"} |
| 4 | EdgeIndexFullScan | 0 | |
| 0 | Start | | |

Scenario: Edge with relational STARTS/NOT STARTS WITH filter
When profiling query:
"""
LOOKUP ON edge_1 WHERE edge_1.col1_str STARTS WITH toUpper("r") YIELD edge_1.col1_str
"""
Then the result should be, in any order:
| SrcVID | DstVID | Ranking | edge_1.col1_str |
| "101" | "102" | 0 | "Red1" |
And the execution plan should be:
| id | name | dependencies | operator info |
| 3 | Project | 2 | |
| 2 | Filter | 4 | {"condition": "(edge_1.col1_str STARTS WITH \"R\")"} |
| 4 | EdgeIndexFullScan | 0 | |
| 0 | Start | | |
When executing query:
"""
LOOKUP ON edge_1 WHERE edge_1.col1_str STARTS WITH "ABC" YIELD edge_1.col1_str
"""
Then the result should be, in any order:
| SrcVID | DstVID | Ranking | edge_1.col1_str |
When executing query:
"""
LOOKUP ON edge_1 WHERE edge_1.col1_str STARTS WITH 123 YIELD edge_1.col1_str
"""
Then a SemanticError should be raised at runtime: Column type error : col1_str
When profiling query:
"""
LOOKUP ON edge_1 WHERE edge_1.col1_str NOT STARTS WITH toUpper("r") YIELD edge_1.col1_str
"""
Then the result should be, in any order:
| SrcVID | DstVID | Ranking | edge_1.col1_str |
| "103" | "101" | 0 | "Blue" |
| "102" | "103" | 0 | "Yellow" |
And the execution plan should be:
| id | name | dependencies | operator info |
| 3 | Project | 2 | |
| 2 | Filter | 4 | {"condition": "(edge_1.col1_str NOT STARTS WITH \"R\")"} |
| 4 | EdgeIndexFullScan | 0 | |
| 0 | Start | | |

Scenario: Edge with relational ENDS/NOT ENDS WITH filter
When profiling query:
"""
LOOKUP ON edge_1 WHERE edge_1.col1_str ENDS WITH toLower("E") YIELD edge_1.col1_str
"""
Then the result should be, in any order:
| SrcVID | DstVID | Ranking | edge_1.col1_str |
| "103" | "101" | 0 | "Blue" |
And the execution plan should be:
| id | name | dependencies | operator info |
| 3 | Project | 2 | |
| 2 | Filter | 4 | {"condition": "(edge_1.col1_str ENDS WITH \"e\")"} |
| 4 | EdgeIndexFullScan | 0 | |
| 0 | Start | | |
When executing query:
"""
LOOKUP ON edge_1 WHERE edge_1.col1_str ENDS WITH "ABC" YIELD edge_1.col1_str
"""
Then the result should be, in any order:
| SrcVID | DstVID | Ranking | edge_1.col1_str |
When executing query:
"""
LOOKUP ON edge_1 WHERE edge_1.col1_str ENDS WITH 123 YIELD edge_1.col1_str
"""
Then a SemanticError should be raised at runtime: Column type error : col1_str
When profiling query:
"""
LOOKUP ON edge_1 WHERE edge_1.col1_str NOT ENDS WITH toLower("E") YIELD edge_1.col1_str
"""
Then the result should be, in any order:
| SrcVID | DstVID | Ranking | edge_1.col1_str |
| "101" | "102" | 0 | "Red1" |
| "102" | "103" | 0 | "Yellow" |
And the execution plan should be:
| id | name | dependencies | operator info |
| 3 | Project | 2 | |
| 2 | Filter | 4 | {"condition": "(edge_1.col1_str NOT ENDS WITH \"e\")"} |
| 4 | EdgeIndexFullScan | 0 | |
| 0 | Start | | |
Loading