From 40c500cad05a5987bebeb2257ad9549f0e8097c6 Mon Sep 17 00:00:00 2001 From: Aiee <18348405+Aiee@users.noreply.github.com> Date: Wed, 22 Sep 2021 22:05:14 +0800 Subject: [PATCH] Forbid using string-related relational expressions as the filter of LOOKUP except STARTS/NOT STARTS WITH --- src/graph/validator/LookupValidator.cpp | 51 +++--- .../features/lookup/EdgeIndexFullScan.feature | 106 +----------- .../features/lookup/TagIndexFullScan.feature | 160 +----------------- 3 files changed, 46 insertions(+), 271 deletions(-) diff --git a/src/graph/validator/LookupValidator.cpp b/src/graph/validator/LookupValidator.cpp index 7894fd51604..930557b1f45 100644 --- a/src/graph/validator/LookupValidator.cpp +++ b/src/graph/validator/LookupValidator.cpp @@ -19,6 +19,8 @@ using nebula::meta::NebulaSchemaProvider; using std::shared_ptr; using std::unique_ptr; +using ExprKind = nebula::Expression::Kind; + namespace nebula { namespace graph { @@ -74,7 +76,7 @@ Status LookupValidator::prepareYield() { auto from = sentence()->from(); for (auto col : yieldClause->columns()) { - if (col->expr()->kind() != Expression::Kind::kLabelAttribute) { + if (col->expr()->kind() != ExprKind::kLabelAttribute) { // TODO(yee): support more exprs, such as (player.age + 1) AS age return Status::SemanticError("Yield clauses are not supported: `%s'", col->toString().c_str()); @@ -145,16 +147,25 @@ StatusOr LookupValidator::handleLogicalExprOperands(LogicalExpressi StatusOr LookupValidator::checkFilter(Expression* expr) { // TODO: Support IN expression push down if (expr->isRelExpr()) { + // Only starts with can be pushed down as a range scan, so forbid other string-related relExpr + if (expr->kind() == ExprKind::kRelREG || expr->kind() == ExprKind::kContains || + expr->kind() == ExprKind::kNotContains || expr->kind() == ExprKind::kEndsWith || + expr->kind() == ExprKind::kNotEndsWith) { + return Status::SemanticError( + "Expression %s is not supported, please use full-text index as an optimal solution", + expr->toString().c_str()); + } + auto relExpr = static_cast(expr); NG_RETURN_IF_ERROR(checkRelExpr(relExpr)); return rewriteRelExpr(relExpr); } switch (expr->kind()) { - case Expression::Kind::kLogicalOr: { + case ExprKind::kLogicalOr: { ExpressionUtils::pullOrs(expr); return handleLogicalExprOperands(static_cast(expr)); } - case Expression::Kind::kLogicalAnd: { + case ExprKind::kLogicalAnd: { ExpressionUtils::pullAnds(expr); return handleLogicalExprOperands(static_cast(expr)); } @@ -168,12 +179,10 @@ Status LookupValidator::checkRelExpr(RelationalExpression* expr) { auto* left = expr->left(); auto* right = expr->right(); // Does not support filter : schema.col1 > schema.col2 - if (left->kind() == Expression::Kind::kLabelAttribute && - right->kind() == Expression::Kind::kLabelAttribute) { + if (left->kind() == ExprKind::kLabelAttribute && right->kind() == ExprKind::kLabelAttribute) { return Status::SemanticError("Expression %s not supported yet", expr->toString().c_str()); } - if (left->kind() == Expression::Kind::kLabelAttribute || - right->kind() == Expression::Kind::kLabelAttribute) { + if (left->kind() == ExprKind::kLabelAttribute || right->kind() == ExprKind::kLabelAttribute) { return Status::OK(); } return Status::SemanticError("Expression %s not supported yet", expr->toString().c_str()); @@ -183,7 +192,7 @@ StatusOr LookupValidator::rewriteRelExpr(RelationalExpression* expr // swap LHS and RHS of relExpr if LabelAttributeExpr in on the right, // so that LabelAttributeExpr is always on the left auto rightOperand = expr->right(); - if (rightOperand->kind() == Expression::Kind::kLabelAttribute) { + if (rightOperand->kind() == ExprKind::kLabelAttribute) { expr = static_cast(reverseRelKind(expr)); } @@ -197,7 +206,7 @@ StatusOr LookupValidator::rewriteRelExpr(RelationalExpression* expr auto foldRes = ExpressionUtils::foldConstantExpr(expr); NG_RETURN_IF_ERROR(foldRes); expr = static_cast(foldRes.value()); - DCHECK_EQ(expr->left()->kind(), Expression::Kind::kLabelAttribute); + DCHECK_EQ(expr->left()->kind(), ExprKind::kLabelAttribute); // Check schema and value type std::string prop = la->right()->value().getStr(); @@ -215,7 +224,7 @@ StatusOr LookupValidator::rewriteRelExpr(RelationalExpression* expr StatusOr LookupValidator::checkConstExpr(Expression* expr, const std::string& prop, - const Expression::Kind kind) { + const ExprKind kind) { auto* pool = expr->getObjPool(); if (!evaluableExpr(expr)) { return Status::SemanticError("'%s' is not an evaluable expression.", expr->toString().c_str()); @@ -240,7 +249,7 @@ StatusOr LookupValidator::checkConstExpr(Expression* expr, double f = v.getFloat(); int iCeil = ceil(f); int iFloor = floor(f); - if (kind == Expression::Kind::kRelGE || kind == Expression::Kind::kRelLT) { + if (kind == ExprKind::kRelGE || kind == ExprKind::kRelLT) { // edge case col1 >= 40.0, no need to round up if (std::abs(f - iCeil) < kEpsilon) { return ConstantExpression::make(pool, iFloor); @@ -287,21 +296,21 @@ Expression* LookupValidator::reverseRelKind(RelationalExpression* expr) { auto reversedKind = kind; switch (kind) { - case Expression::Kind::kRelEQ: + case ExprKind::kRelEQ: break; - case Expression::Kind::kRelNE: + case ExprKind::kRelNE: break; - case Expression::Kind::kRelLT: - reversedKind = Expression::Kind::kRelGT; + case ExprKind::kRelLT: + reversedKind = ExprKind::kRelGT; break; - case Expression::Kind::kRelLE: - reversedKind = Expression::Kind::kRelGE; + case ExprKind::kRelLE: + reversedKind = ExprKind::kRelGE; break; - case Expression::Kind::kRelGT: - reversedKind = Expression::Kind::kRelLT; + case ExprKind::kRelGT: + reversedKind = ExprKind::kRelLT; break; - case Expression::Kind::kRelGE: - reversedKind = Expression::Kind::kRelLE; + case ExprKind::kRelGE: + reversedKind = ExprKind::kRelLE; break; default: LOG(FATAL) << "Invalid relational expression kind: " << static_cast(kind); diff --git a/tests/tck/features/lookup/EdgeIndexFullScan.feature b/tests/tck/features/lookup/EdgeIndexFullScan.feature index bf89a32e5c2..a2e6b49ea35 100644 --- a/tests/tck/features/lookup/EdgeIndexFullScan.feature +++ b/tests/tck/features/lookup/EdgeIndexFullScan.feature @@ -33,51 +33,12 @@ Feature: Lookup edge index full scan '103'->'101':('Blue', 33); """ - Scenario: Edge with relational RegExp filter[1] + Scenario: Edge with relational RegExp filter When executing query: """ LOOKUP ON edge_1 WHERE edge_1.col1_str =~ "\\w+\\d+" YIELD edge_1.col1_str """ - Then the result should be, in any order: - | SrcVID | DstVID | Ranking | edge_1.col1_str | - | "101" | "102" | 0 | "Red1" | - When executing query: - """ - LOOKUP ON edge_1 WHERE edge_1.col1_str =~ "\\w+ll\\w+" YIELD edge_1.col1_str - """ - Then the result should be, in any order: - | SrcVID | DstVID | Ranking | edge_1.col1_str | - | "102" | "103" | 0 | "Yellow" | - - # skip because `make fmt` will delete '\' in the operator info and causes tests fail - @skip - Scenario: Edge with relational RegExp filter[2] - When profiling query: - """ - LOOKUP ON edge_1 where edge_1.col1_str =~ "\\d+\\w+" YIELD edge_1.col1_str - """ - Then the result should be, in any order: - | SrcVID | DstVID | Ranking | edge_1.col1_str | - | "101" | "102" | 0 | "Red1" | - And the execution plan should be: - | id | name | dependencies | operator info | - | 3 | Project | 2 | | - | 2 | Filter | 4 | {"condition": "(edge_1.col1_str=~\"\w+\d+\")"} | - | 4 | EdgeIndexFullScan | 0 | | - | 0 | Start | | | - When profiling query: - """ - LOOKUP ON edge_1 where edge_1.col1_str =~ "\\w+ea\\w+" YIELD edge_1.col1_str - """ - Then the result should be, in any order: - | SrcVID | DstVID | Ranking | edge_1.col1_str | - | "102" | "103" | 0 | "Yellow" | - And the execution plan should be: - | id | name | dependencies | operator info | - | 3 | Project | 2 | | - | 2 | Filter | 4 | {"condition": "(edge_1.col1_str=~\"\w+ea\w+\")"} | - | 4 | EdgeIndexFullScan | 0 | | - | 0 | Start | | | + Then a SemanticError should be raised at runtime: Expression (edge_1.col1_str=~"\w+\d+") is not supported, please use full-text index as an optimal solution Scenario: Edge with relational NE filter When profiling query: @@ -250,39 +211,16 @@ Feature: Lookup edge index full scan | 0 | Start | | | Scenario: Edge with relational CONTAINS/NOT CONTAINS filter - When profiling query: - """ - LOOKUP ON edge_1 WHERE edge_1.col1_str CONTAINS toLower("L") YIELD edge_1.col1_str - """ - Then the result should be, in any order: - | SrcVID | DstVID | Ranking | edge_1.col1_str | - | "103" | "101" | 0 | "Blue" | - | "102" | "103" | 0 | "Yellow" | - And the execution plan should be: - | id | name | dependencies | operator info | - | 3 | Project | 2 | | - | 2 | Filter | 4 | {"condition": "(edge_1.col1_str CONTAINS \"l\")"} | - | 4 | EdgeIndexFullScan | 0 | | - | 0 | Start | | | When executing query: """ LOOKUP ON edge_1 WHERE edge_1.col1_str CONTAINS "ABC" YIELD edge_1.col1_str """ - Then the result should be, in any order: - | SrcVID | DstVID | Ranking | edge_1.col1_str | - When profiling query: + Then a SemanticError should be raised at runtime: Expression (edge_1.col1_str CONTAINS "ABC") is not supported, please use full-text index as an optimal solution + When executing query: """ - LOOKUP ON edge_1 WHERE edge_1.col1_str NOT CONTAINS toLower("L") YIELD edge_1.col1_str + LOOKUP ON edge_1 WHERE edge_1.col1_str NOT CONTAINS "ABC" YIELD edge_1.col1_str """ - Then the result should be, in any order: - | SrcVID | DstVID | Ranking | edge_1.col1_str | - | "101" | "102" | 0 | "Red1" | - And the execution plan should be: - | id | name | dependencies | operator info | - | 3 | Project | 2 | | - | 2 | Filter | 4 | {"condition": "(edge_1.col1_str NOT CONTAINS \"l\")"} | - | 4 | EdgeIndexFullScan | 0 | | - | 0 | Start | | | + Then a SemanticError should be raised at runtime: Expression (edge_1.col1_str NOT CONTAINS "ABC") is not supported, please use full-text index as an optimal solution Scenario: Edge with relational STARTS/NOT STARTS WITH filter When profiling query: @@ -325,41 +263,13 @@ Feature: Lookup edge index full scan | 0 | Start | | | Scenario: Edge with relational ENDS/NOT ENDS WITH filter - When profiling query: - """ - LOOKUP ON edge_1 WHERE edge_1.col1_str ENDS WITH toLower("E") YIELD edge_1.col1_str - """ - Then the result should be, in any order: - | SrcVID | DstVID | Ranking | edge_1.col1_str | - | "103" | "101" | 0 | "Blue" | - And the execution plan should be: - | id | name | dependencies | operator info | - | 3 | Project | 2 | | - | 2 | Filter | 4 | {"condition": "(edge_1.col1_str ENDS WITH \"e\")"} | - | 4 | EdgeIndexFullScan | 0 | | - | 0 | Start | | | When executing query: """ LOOKUP ON edge_1 WHERE edge_1.col1_str ENDS WITH "ABC" YIELD edge_1.col1_str """ - Then the result should be, in any order: - | SrcVID | DstVID | Ranking | edge_1.col1_str | + Then a SemanticError should be raised at runtime: Expression (edge_1.col1_str ENDS WITH "ABC") is not supported, please use full-text index as an optimal solution When executing query: - """ - LOOKUP ON edge_1 WHERE edge_1.col1_str ENDS WITH 123 YIELD edge_1.col1_str - """ - Then a SemanticError should be raised at runtime: Column type error : col1_str - When profiling query: """ LOOKUP ON edge_1 WHERE edge_1.col1_str NOT ENDS WITH toLower("E") YIELD edge_1.col1_str """ - Then the result should be, in any order: - | SrcVID | DstVID | Ranking | edge_1.col1_str | - | "101" | "102" | 0 | "Red1" | - | "102" | "103" | 0 | "Yellow" | - And the execution plan should be: - | id | name | dependencies | operator info | - | 3 | Project | 2 | | - | 2 | Filter | 4 | {"condition": "(edge_1.col1_str NOT ENDS WITH \"e\")"} | - | 4 | EdgeIndexFullScan | 0 | | - | 0 | Start | | | + Then a SemanticError should be raised at runtime: Expression (edge_1.col1_str NOT ENDS WITH toLower("E")) is not supported, please use full-text index as an optimal solution diff --git a/tests/tck/features/lookup/TagIndexFullScan.feature b/tests/tck/features/lookup/TagIndexFullScan.feature index ecbbbd30817..e3b76329de3 100644 --- a/tests/tck/features/lookup/TagIndexFullScan.feature +++ b/tests/tck/features/lookup/TagIndexFullScan.feature @@ -8,46 +8,7 @@ Feature: Lookup tag index full scan """ LOOKUP ON team where team.name =~ "\\d+\\w+" """ - Then the result should be, in any order: - | VertexID | - | "76ers" | - When executing query: - """ - LOOKUP ON team where team.name =~ "\\w+ea\\w+" - """ - Then the result should be, in any order: - | VertexID | - | "Heat" | - - # skip because `make fmt` will delete '\' in the operator info and causes tests fail - @skip - Scenario: Tag with relational RegExp filter[2] - When profiling query: - """ - LOOKUP ON team where team.name =~ "\\d+\\w+" - """ - Then the result should be, in any order: - | VertexID | - | "76ers" | - And the execution plan should be: - | id | name | dependencies | operator info | - | 3 | Project | 2 | | - | 2 | Filter | 4 | {"condition": "(team.name=~\"\d+\w+\")"} | - | 4 | TagIndexFullScan | 0 | | - | 0 | Start | | | - When profiling query: - """ - LOOKUP ON team where team.name =~ "\\w+ea\\w+" - """ - Then the result should be, in any order: - | VertexID | - | "Heat" | - And the execution plan should be: - | id | name | dependencies | operator info | - | 3 | Project | 2 | | - | 2 | Filter | 4 | {"condition": "(team.name=~\"\w+ea\w+\")"} | - | 4 | TagIndexFullScan | 0 | | - | 0 | Start | | | + Then a SemanticError should be raised at runtime: Expression (team.name=~"\d+\w+") is not supported, please use full-text index as an optimal solution Scenario: Tag with relational NE filter When profiling query: @@ -292,66 +253,16 @@ Feature: Lookup tag index full scan | 0 | Start | | | Scenario: Tag with relational CONTAINS/NOT CONTAINS filter - When profiling query: - """ - LOOKUP ON team WHERE team.name CONTAINS toLower("ER") - """ - Then the result should be, in any order: - | VertexID | - | "76ers" | - | "Trail Blazers" | - | "Timberwolves" | - | "Cavaliers" | - | "Thunders" | - | "Clippers" | - | "Pacers" | - | "Mavericks" | - | "Lakers" | - And the execution plan should be: - | id | name | dependencies | operator info | - | 3 | Project | 2 | | - | 2 | Filter | 4 | {"condition": "(team.name CONTAINS \"er\")"} | - | 4 | TagIndexFullScan | 0 | | - | 0 | Start | | | When executing query: """ LOOKUP ON team WHERE team.name CONTAINS "ABC" """ - Then the result should be, in any order: - | VertexID | - When profiling query: + Then a SemanticError should be raised at runtime: Expression (team.name CONTAINS "ABC") is not supported, please use full-text index as an optimal solution + When executing query: """ - LOOKUP ON team WHERE team.name NOT CONTAINS toLower("ER") + LOOKUP ON team WHERE team.name NOT CONTAINS "ABC" """ - Then the result should be, in any order: - | VertexID | - | "Wizards" | - | "Bucks" | - | "Bulls" | - | "Warriors" | - | "Celtics" | - | "Suns" | - | "Grizzlies" | - | "Hawks" | - | "Heat" | - | "Hornets" | - | "Jazz" | - | "Kings" | - | "Knicks" | - | "Spurs" | - | "Magic" | - | "Rockets" | - | "Nets" | - | "Nuggets" | - | "Raptors" | - | "Pelicans" | - | "Pistons" | - And the execution plan should be: - | id | name | dependencies | operator info | - | 3 | Project | 2 | | - | 2 | Filter | 4 | {"condition": "(team.name NOT CONTAINS \"er\")"} | - | 4 | TagIndexFullScan | 0 | | - | 0 | Start | | | + Then a SemanticError should be raised at runtime: Expression (team.name NOT CONTAINS "ABC") is not supported, please use full-text index as an optimal solution Scenario: Tag with relational STARTS/NOT STARTS WITH filter When profiling query: @@ -421,68 +332,13 @@ Feature: Lookup tag index full scan | 0 | Start | | | Scenario: Tag with relational ENDS/NOT ENDS WITH filter - When profiling query: - """ - LOOKUP ON team WHERE team.name ENDS WITH toLower("S") - """ - Then the result should be, in any order: - | VertexID | - | "76ers" | - | "Bucks" | - | "Bulls" | - | "Cavaliers" | - | "Celtics" | - | "Clippers" | - | "Grizzlies" | - | "Hawks" | - | "Wizards" | - | "Hornets" | - | "Warriors" | - | "Kings" | - | "Knicks" | - | "Lakers" | - | "Trail Blazers" | - | "Mavericks" | - | "Nets" | - | "Nuggets" | - | "Pacers" | - | "Pelicans" | - | "Pistons" | - | "Raptors" | - | "Rockets" | - | "Spurs" | - | "Suns" | - | "Thunders" | - | "Timberwolves" | - And the execution plan should be: - | id | name | dependencies | operator info | - | 3 | Project | 2 | | - | 2 | Filter | 4 | {"condition": "(team.name ENDS WITH \"s\")"} | - | 4 | TagIndexFullScan | 0 | | - | 0 | Start | | | When executing query: """ - LOOKUP ON team WHERE team.name ENDS WITH "ABC" + LOOKUP ON team WHERE team.name ENDS WITH toLower("S") """ - Then the result should be, in any order: - | VertexID | + Then a SemanticError should be raised at runtime: Expression (team.name ENDS WITH toLower("S")) is not supported, please use full-text index as an optimal solution When executing query: - """ - LOOKUP ON team WHERE team.name ENDS WITH 123 - """ - Then a SemanticError should be raised at runtime: Column type error : name - When profiling query: """ LOOKUP ON team WHERE team.name NOT ENDS WITH toLower("S") """ - Then the result should be, in any order: - | VertexID | - | "Magic" | - | "Jazz" | - | "Heat" | - And the execution plan should be: - | id | name | dependencies | operator info | - | 3 | Project | 2 | | - | 2 | Filter | 4 | {"condition": "(team.name NOT ENDS WITH \"s\")"} | - | 4 | TagIndexFullScan | 0 | | - | 0 | Start | | | + Then a SemanticError should be raised at runtime: Expression (team.name NOT ENDS WITH toLower("S")) is not supported, please use full-text index as an optimal solution