Skip to content

Commit

Permalink
Forbid using string-related relational expressions as the filter of L…
Browse files Browse the repository at this point in the history
…OOKUP except STARTS/NOT STARTS WITH
  • Loading branch information
Aiee committed Sep 23, 2021
1 parent 1dbfc5f commit 40c500c
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 271 deletions.
51 changes: 30 additions & 21 deletions src/graph/validator/LookupValidator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ using nebula::meta::NebulaSchemaProvider;
using std::shared_ptr;
using std::unique_ptr;

using ExprKind = nebula::Expression::Kind;

namespace nebula {
namespace graph {

Expand Down Expand Up @@ -74,7 +76,7 @@ Status LookupValidator::prepareYield() {

auto from = sentence()->from();
for (auto col : yieldClause->columns()) {
if (col->expr()->kind() != Expression::Kind::kLabelAttribute) {
if (col->expr()->kind() != ExprKind::kLabelAttribute) {
// TODO(yee): support more exprs, such as (player.age + 1) AS age
return Status::SemanticError("Yield clauses are not supported: `%s'",
col->toString().c_str());
Expand Down Expand Up @@ -145,16 +147,25 @@ StatusOr<Expression*> LookupValidator::handleLogicalExprOperands(LogicalExpressi
StatusOr<Expression*> LookupValidator::checkFilter(Expression* expr) {
// TODO: Support IN expression push down
if (expr->isRelExpr()) {
// Only starts with can be pushed down as a range scan, so forbid other string-related relExpr
if (expr->kind() == ExprKind::kRelREG || expr->kind() == ExprKind::kContains ||
expr->kind() == ExprKind::kNotContains || expr->kind() == ExprKind::kEndsWith ||
expr->kind() == ExprKind::kNotEndsWith) {
return Status::SemanticError(
"Expression %s is not supported, please use full-text index as an optimal solution",
expr->toString().c_str());
}

auto relExpr = static_cast<RelationalExpression*>(expr);
NG_RETURN_IF_ERROR(checkRelExpr(relExpr));
return rewriteRelExpr(relExpr);
}
switch (expr->kind()) {
case Expression::Kind::kLogicalOr: {
case ExprKind::kLogicalOr: {
ExpressionUtils::pullOrs(expr);
return handleLogicalExprOperands(static_cast<LogicalExpression*>(expr));
}
case Expression::Kind::kLogicalAnd: {
case ExprKind::kLogicalAnd: {
ExpressionUtils::pullAnds(expr);
return handleLogicalExprOperands(static_cast<LogicalExpression*>(expr));
}
Expand All @@ -168,12 +179,10 @@ Status LookupValidator::checkRelExpr(RelationalExpression* expr) {
auto* left = expr->left();
auto* right = expr->right();
// Does not support filter : schema.col1 > schema.col2
if (left->kind() == Expression::Kind::kLabelAttribute &&
right->kind() == Expression::Kind::kLabelAttribute) {
if (left->kind() == ExprKind::kLabelAttribute && right->kind() == ExprKind::kLabelAttribute) {
return Status::SemanticError("Expression %s not supported yet", expr->toString().c_str());
}
if (left->kind() == Expression::Kind::kLabelAttribute ||
right->kind() == Expression::Kind::kLabelAttribute) {
if (left->kind() == ExprKind::kLabelAttribute || right->kind() == ExprKind::kLabelAttribute) {
return Status::OK();
}
return Status::SemanticError("Expression %s not supported yet", expr->toString().c_str());
Expand All @@ -183,7 +192,7 @@ StatusOr<Expression*> LookupValidator::rewriteRelExpr(RelationalExpression* expr
// swap LHS and RHS of relExpr if LabelAttributeExpr in on the right,
// so that LabelAttributeExpr is always on the left
auto rightOperand = expr->right();
if (rightOperand->kind() == Expression::Kind::kLabelAttribute) {
if (rightOperand->kind() == ExprKind::kLabelAttribute) {
expr = static_cast<RelationalExpression*>(reverseRelKind(expr));
}

Expand All @@ -197,7 +206,7 @@ StatusOr<Expression*> LookupValidator::rewriteRelExpr(RelationalExpression* expr
auto foldRes = ExpressionUtils::foldConstantExpr(expr);
NG_RETURN_IF_ERROR(foldRes);
expr = static_cast<RelationalExpression*>(foldRes.value());
DCHECK_EQ(expr->left()->kind(), Expression::Kind::kLabelAttribute);
DCHECK_EQ(expr->left()->kind(), ExprKind::kLabelAttribute);

// Check schema and value type
std::string prop = la->right()->value().getStr();
Expand All @@ -215,7 +224,7 @@ StatusOr<Expression*> LookupValidator::rewriteRelExpr(RelationalExpression* expr

StatusOr<Expression*> LookupValidator::checkConstExpr(Expression* expr,
const std::string& prop,
const Expression::Kind kind) {
const ExprKind kind) {
auto* pool = expr->getObjPool();
if (!evaluableExpr(expr)) {
return Status::SemanticError("'%s' is not an evaluable expression.", expr->toString().c_str());
Expand All @@ -240,7 +249,7 @@ StatusOr<Expression*> LookupValidator::checkConstExpr(Expression* expr,
double f = v.getFloat();
int iCeil = ceil(f);
int iFloor = floor(f);
if (kind == Expression::Kind::kRelGE || kind == Expression::Kind::kRelLT) {
if (kind == ExprKind::kRelGE || kind == ExprKind::kRelLT) {
// edge case col1 >= 40.0, no need to round up
if (std::abs(f - iCeil) < kEpsilon) {
return ConstantExpression::make(pool, iFloor);
Expand Down Expand Up @@ -287,21 +296,21 @@ Expression* LookupValidator::reverseRelKind(RelationalExpression* expr) {
auto reversedKind = kind;

switch (kind) {
case Expression::Kind::kRelEQ:
case ExprKind::kRelEQ:
break;
case Expression::Kind::kRelNE:
case ExprKind::kRelNE:
break;
case Expression::Kind::kRelLT:
reversedKind = Expression::Kind::kRelGT;
case ExprKind::kRelLT:
reversedKind = ExprKind::kRelGT;
break;
case Expression::Kind::kRelLE:
reversedKind = Expression::Kind::kRelGE;
case ExprKind::kRelLE:
reversedKind = ExprKind::kRelGE;
break;
case Expression::Kind::kRelGT:
reversedKind = Expression::Kind::kRelLT;
case ExprKind::kRelGT:
reversedKind = ExprKind::kRelLT;
break;
case Expression::Kind::kRelGE:
reversedKind = Expression::Kind::kRelLE;
case ExprKind::kRelGE:
reversedKind = ExprKind::kRelLE;
break;
default:
LOG(FATAL) << "Invalid relational expression kind: " << static_cast<uint8_t>(kind);
Expand Down
106 changes: 8 additions & 98 deletions tests/tck/features/lookup/EdgeIndexFullScan.feature
Original file line number Diff line number Diff line change
Expand Up @@ -33,51 +33,12 @@ Feature: Lookup edge index full scan
'103'->'101':('Blue', 33);
"""

Scenario: Edge with relational RegExp filter[1]
Scenario: Edge with relational RegExp filter
When executing query:
"""
LOOKUP ON edge_1 WHERE edge_1.col1_str =~ "\\w+\\d+" YIELD edge_1.col1_str
"""
Then the result should be, in any order:
| SrcVID | DstVID | Ranking | edge_1.col1_str |
| "101" | "102" | 0 | "Red1" |
When executing query:
"""
LOOKUP ON edge_1 WHERE edge_1.col1_str =~ "\\w+ll\\w+" YIELD edge_1.col1_str
"""
Then the result should be, in any order:
| SrcVID | DstVID | Ranking | edge_1.col1_str |
| "102" | "103" | 0 | "Yellow" |

# skip because `make fmt` will delete '\' in the operator info and causes tests fail
@skip
Scenario: Edge with relational RegExp filter[2]
When profiling query:
"""
LOOKUP ON edge_1 where edge_1.col1_str =~ "\\d+\\w+" YIELD edge_1.col1_str
"""
Then the result should be, in any order:
| SrcVID | DstVID | Ranking | edge_1.col1_str |
| "101" | "102" | 0 | "Red1" |
And the execution plan should be:
| id | name | dependencies | operator info |
| 3 | Project | 2 | |
| 2 | Filter | 4 | {"condition": "(edge_1.col1_str=~\"\w+\d+\")"} |
| 4 | EdgeIndexFullScan | 0 | |
| 0 | Start | | |
When profiling query:
"""
LOOKUP ON edge_1 where edge_1.col1_str =~ "\\w+ea\\w+" YIELD edge_1.col1_str
"""
Then the result should be, in any order:
| SrcVID | DstVID | Ranking | edge_1.col1_str |
| "102" | "103" | 0 | "Yellow" |
And the execution plan should be:
| id | name | dependencies | operator info |
| 3 | Project | 2 | |
| 2 | Filter | 4 | {"condition": "(edge_1.col1_str=~\"\w+ea\w+\")"} |
| 4 | EdgeIndexFullScan | 0 | |
| 0 | Start | | |
Then a SemanticError should be raised at runtime: Expression (edge_1.col1_str=~"\w+\d+") is not supported, please use full-text index as an optimal solution

Scenario: Edge with relational NE filter
When profiling query:
Expand Down Expand Up @@ -250,39 +211,16 @@ Feature: Lookup edge index full scan
| 0 | Start | | |

Scenario: Edge with relational CONTAINS/NOT CONTAINS filter
When profiling query:
"""
LOOKUP ON edge_1 WHERE edge_1.col1_str CONTAINS toLower("L") YIELD edge_1.col1_str
"""
Then the result should be, in any order:
| SrcVID | DstVID | Ranking | edge_1.col1_str |
| "103" | "101" | 0 | "Blue" |
| "102" | "103" | 0 | "Yellow" |
And the execution plan should be:
| id | name | dependencies | operator info |
| 3 | Project | 2 | |
| 2 | Filter | 4 | {"condition": "(edge_1.col1_str CONTAINS \"l\")"} |
| 4 | EdgeIndexFullScan | 0 | |
| 0 | Start | | |
When executing query:
"""
LOOKUP ON edge_1 WHERE edge_1.col1_str CONTAINS "ABC" YIELD edge_1.col1_str
"""
Then the result should be, in any order:
| SrcVID | DstVID | Ranking | edge_1.col1_str |
When profiling query:
Then a SemanticError should be raised at runtime: Expression (edge_1.col1_str CONTAINS "ABC") is not supported, please use full-text index as an optimal solution
When executing query:
"""
LOOKUP ON edge_1 WHERE edge_1.col1_str NOT CONTAINS toLower("L") YIELD edge_1.col1_str
LOOKUP ON edge_1 WHERE edge_1.col1_str NOT CONTAINS "ABC" YIELD edge_1.col1_str
"""
Then the result should be, in any order:
| SrcVID | DstVID | Ranking | edge_1.col1_str |
| "101" | "102" | 0 | "Red1" |
And the execution plan should be:
| id | name | dependencies | operator info |
| 3 | Project | 2 | |
| 2 | Filter | 4 | {"condition": "(edge_1.col1_str NOT CONTAINS \"l\")"} |
| 4 | EdgeIndexFullScan | 0 | |
| 0 | Start | | |
Then a SemanticError should be raised at runtime: Expression (edge_1.col1_str NOT CONTAINS "ABC") is not supported, please use full-text index as an optimal solution

Scenario: Edge with relational STARTS/NOT STARTS WITH filter
When profiling query:
Expand Down Expand Up @@ -325,41 +263,13 @@ Feature: Lookup edge index full scan
| 0 | Start | | |

Scenario: Edge with relational ENDS/NOT ENDS WITH filter
When profiling query:
"""
LOOKUP ON edge_1 WHERE edge_1.col1_str ENDS WITH toLower("E") YIELD edge_1.col1_str
"""
Then the result should be, in any order:
| SrcVID | DstVID | Ranking | edge_1.col1_str |
| "103" | "101" | 0 | "Blue" |
And the execution plan should be:
| id | name | dependencies | operator info |
| 3 | Project | 2 | |
| 2 | Filter | 4 | {"condition": "(edge_1.col1_str ENDS WITH \"e\")"} |
| 4 | EdgeIndexFullScan | 0 | |
| 0 | Start | | |
When executing query:
"""
LOOKUP ON edge_1 WHERE edge_1.col1_str ENDS WITH "ABC" YIELD edge_1.col1_str
"""
Then the result should be, in any order:
| SrcVID | DstVID | Ranking | edge_1.col1_str |
Then a SemanticError should be raised at runtime: Expression (edge_1.col1_str ENDS WITH "ABC") is not supported, please use full-text index as an optimal solution
When executing query:
"""
LOOKUP ON edge_1 WHERE edge_1.col1_str ENDS WITH 123 YIELD edge_1.col1_str
"""
Then a SemanticError should be raised at runtime: Column type error : col1_str
When profiling query:
"""
LOOKUP ON edge_1 WHERE edge_1.col1_str NOT ENDS WITH toLower("E") YIELD edge_1.col1_str
"""
Then the result should be, in any order:
| SrcVID | DstVID | Ranking | edge_1.col1_str |
| "101" | "102" | 0 | "Red1" |
| "102" | "103" | 0 | "Yellow" |
And the execution plan should be:
| id | name | dependencies | operator info |
| 3 | Project | 2 | |
| 2 | Filter | 4 | {"condition": "(edge_1.col1_str NOT ENDS WITH \"e\")"} |
| 4 | EdgeIndexFullScan | 0 | |
| 0 | Start | | |
Then a SemanticError should be raised at runtime: Expression (edge_1.col1_str NOT ENDS WITH toLower("E")) is not supported, please use full-text index as an optimal solution
Loading

0 comments on commit 40c500c

Please sign in to comment.