From b296f9af737382b61352e9d34f409c8f51d70fce Mon Sep 17 00:00:00 2001 From: Aiee <18348405+Aiee@users.noreply.github.com> Date: Thu, 23 Sep 2021 23:15:21 +0800 Subject: [PATCH] Address comments --- src/graph/validator/LookupValidator.cpp | 2 +- .../features/lookup/EdgeIndexFullScan.feature | 98 ++++++++++-- .../features/lookup/TagIndexFullScan.feature | 144 +++++++++++++----- 3 files changed, 186 insertions(+), 58 deletions(-) diff --git a/src/graph/validator/LookupValidator.cpp b/src/graph/validator/LookupValidator.cpp index 930557b1f45..97df8d7a0bd 100644 --- a/src/graph/validator/LookupValidator.cpp +++ b/src/graph/validator/LookupValidator.cpp @@ -150,7 +150,7 @@ StatusOr LookupValidator::checkFilter(Expression* expr) { // Only starts with can be pushed down as a range scan, so forbid other string-related relExpr if (expr->kind() == ExprKind::kRelREG || expr->kind() == ExprKind::kContains || expr->kind() == ExprKind::kNotContains || expr->kind() == ExprKind::kEndsWith || - expr->kind() == ExprKind::kNotEndsWith) { + expr->kind() == ExprKind::kNotStartsWith || expr->kind() == ExprKind::kNotEndsWith) { return Status::SemanticError( "Expression %s is not supported, please use full-text index as an optimal solution", expr->toString().c_str()); diff --git a/tests/tck/features/lookup/EdgeIndexFullScan.feature b/tests/tck/features/lookup/EdgeIndexFullScan.feature index a2e6b49ea35..d07348979c3 100644 --- a/tests/tck/features/lookup/EdgeIndexFullScan.feature +++ b/tests/tck/features/lookup/EdgeIndexFullScan.feature @@ -70,7 +70,7 @@ Feature: Lookup edge index full scan | 4 | EdgeIndexFullScan | 0 | | | 0 | Start | | | - Scenario: Edge with relational IN/NOT IN filter + Scenario: Edge with simple relational IN filter When profiling query: """ LOOKUP ON edge_1 WHERE edge_1.col1_str IN ["Red", "Yellow"] YIELD edge_1.col1_str @@ -151,12 +151,30 @@ Feature: Lookup edge index full scan | 3 | Project | 4 | | | 4 | EdgeIndexPrefixScan | 0 | | | 0 | Start | | | - # a IN b AND c IN d + + Scenario: Edge with complex relational IN filter + # (a IN b) AND (c IN d) # List has only 1 element, so prefixScan is applied When profiling query: """ LOOKUP ON edge_1 - WHERE edge_1.col2_int IN [11 , 66/2] AND edge_1.col1_str IN [toUpper("r")+"ed1"] + WHERE edge_1.col2_int IN [11 , 33] AND edge_1.col1_str IN ["Red1"] + YIELD edge_1.col1_str, edge_1.col2_int + """ + Then the result should be, in any order: + | SrcVID | DstVID | Ranking | edge_1.col1_str | edge_1.col2_int | + | "101" | "102" | 0 | "Red1" | 11 | + And the execution plan should be: + | id | name | dependencies | operator info | + | 3 | Project | 4 | | + | 4 | IndexScan | 0 | | + | 0 | Start | | | + # (a IN b) AND (c IN d) + # a, c both have indexes (4 prefixScan will be executed) + When profiling query: + """ + LOOKUP ON edge_1 + WHERE edge_1.col2_int IN [11 , 33] AND edge_1.col1_str IN ["Red1", "ABC"] YIELD edge_1.col1_str, edge_1.col2_int """ Then the result should be, in any order: @@ -167,11 +185,23 @@ Feature: Lookup edge index full scan | 3 | Project | 4 | | | 4 | IndexScan | 0 | | | 0 | Start | | | - # a IN b AND c IN d (4 prefixScan will be executed) + # (a IN b) AND (c IN d) + # a, c have a composite index + When executing query: + """ + CREATE EDGE INDEX composite_edge_index ON edge_1(col1_str(20), col2_int); + """ + Then the execution should be successful + And wait 6 seconds + When submit a job: + """ + REBUILD EDGE INDEX composite_edge_index + """ + Then wait the job to finish When profiling query: """ LOOKUP ON edge_1 - WHERE edge_1.col2_int IN [11 , 66/2] AND edge_1.col1_str IN [toUpper("r")+"ed1", "ABC"] + WHERE edge_1.col2_int IN [11 , 33] AND edge_1.col1_str IN ["Red1", "ABC"] YIELD edge_1.col1_str, edge_1.col2_int """ Then the result should be, in any order: @@ -182,6 +212,51 @@ Feature: Lookup edge index full scan | 3 | Project | 4 | | | 4 | IndexScan | 0 | | | 0 | Start | | | + # (a IN b) AND (c IN d) while only a has index + # first drop tag index + When executing query: + """ + DROP EDGE INDEX composite_edge_index + """ + Then the execution should be successful + When executing query: + """ + DROP EDGE INDEX col1_str_index + """ + Then the execution should be successful + And wait 6 seconds + # since the edge index has been dropped, here an EdgeIndexFullScan should be performed + When profiling query: + """ + LOOKUP ON edge_1 + WHERE edge_1.col1_str IN ["Red1", "ABC"] + YIELD edge_1.col1_str, edge_1.col2_int + """ + Then the result should be, in any order: + | SrcVID | DstVID | Ranking | edge_1.col1_str | edge_1.col2_int | + | "101" | "102" | 0 | "Red1" | 11 | + And the execution plan should be: + | id | name | dependencies | operator info | + | 3 | Project | 2 | | + | 2 | Filter | 4 | | + | 4 | EdgeIndexFullScan | 0 | | + | 0 | Start | | | + When profiling query: + """ + LOOKUP ON edge_1 + WHERE edge_1.col2_int IN [11 , 33] AND edge_1.col1_str IN ["Red1", "ABC"] + YIELD edge_1.col1_str, edge_1.col2_int + """ + Then the result should be, in any order: + | SrcVID | DstVID | Ranking | edge_1.col1_str | edge_1.col2_int | + | "101" | "102" | 0 | "Red1" | 11 | + And the execution plan should be: + | id | name | dependencies | operator info | + | 3 | Project | 4 | | + | 4 | IndexScan | 0 | | + | 0 | Start | | | + + Scenario: Edge with relational NOT IN filter When profiling query: """ LOOKUP ON edge_1 WHERE edge_1.col1_str NOT IN ["Blue"] YIELD edge_1.col1_str @@ -249,18 +324,9 @@ Feature: Lookup edge index full scan Then a SemanticError should be raised at runtime: Column type error : col1_str When profiling query: """ - LOOKUP ON edge_1 WHERE edge_1.col1_str NOT STARTS WITH toUpper("r") YIELD edge_1.col1_str + LOOKUP ON edge_1 WHERE edge_1.col1_str NOT STARTS WITH "R" YIELD edge_1.col1_str """ - Then the result should be, in any order: - | SrcVID | DstVID | Ranking | edge_1.col1_str | - | "103" | "101" | 0 | "Blue" | - | "102" | "103" | 0 | "Yellow" | - And the execution plan should be: - | id | name | dependencies | operator info | - | 3 | Project | 2 | | - | 2 | Filter | 4 | {"condition": "(edge_1.col1_str NOT STARTS WITH \"R\")"} | - | 4 | EdgeIndexFullScan | 0 | | - | 0 | Start | | | + Then a SemanticError should be raised at runtime: Expression (edge_1.col1_str NOT STARTS WITH "R") is not supported, please use full-text index as an optimal solution Scenario: Edge with relational ENDS/NOT ENDS WITH filter When executing query: diff --git a/tests/tck/features/lookup/TagIndexFullScan.feature b/tests/tck/features/lookup/TagIndexFullScan.feature index e3b76329de3..85e189d9b0c 100644 --- a/tests/tck/features/lookup/TagIndexFullScan.feature +++ b/tests/tck/features/lookup/TagIndexFullScan.feature @@ -54,7 +54,7 @@ Feature: Lookup tag index full scan | 0 | Start | | | # TODO: Support compare operator info that has multiple column hints - Scenario: Tag with relational IN/NOT IN filter + Scenario: Tag with simple relational IN filter When profiling query: """ LOOKUP ON team WHERE team.name IN ["Hornets", "Jazz"] @@ -89,7 +89,7 @@ Feature: Lookup tag index full scan | 3 | Project | 4 | | | 4 | IndexScan | 0 | | | 0 | Start | | | - # a IN b OR c + # (a IN b) OR c When profiling query: """ LOOKUP ON player WHERE player.age IN [40, 25] OR player.name == "ABC" YIELD player.age @@ -105,7 +105,7 @@ Feature: Lookup tag index full scan | 3 | Project | 4 | | | 4 | IndexScan | 0 | | | 0 | Start | | | - # a IN b OR c IN d + # (a IN b) OR (c IN d) When profiling query: """ LOOKUP ON player WHERE player.age IN [40, 25] OR player.name IN ["Kobe Bryant"] YIELD player.age @@ -121,7 +121,7 @@ Feature: Lookup tag index full scan | 3 | Project | 4 | | | 4 | IndexScan | 0 | | | 0 | Start | | | - # a IN b AND c + # (a IN b) AND c When profiling query: """ LOOKUP ON player WHERE player.age IN [40, 25] AND player.name == "Kobe Bryant" YIELD player.age @@ -134,7 +134,37 @@ Feature: Lookup tag index full scan | 3 | Project | 4 | | | 4 | IndexScan | 0 | | | 0 | Start | | | - # a IN b AND c IN d + When profiling query: + """ + LOOKUP ON player WHERE player.name IN ["Kobe Bryant", "Tim Duncan"] AND player.age > 30 + """ + Then the result should be, in any order: + | VertexID | + | "Kobe Bryant" | + | "Tim Duncan" | + And the execution plan should be: + | id | name | dependencies | operator info | + | 3 | Project | 4 | | + | 4 | IndexScan | 0 | | + | 0 | Start | | | + # c AND (a IN b) + When profiling query: + """ + LOOKUP ON player WHERE player.age IN [40, 25] AND player.name == "Kobe Bryant" YIELD player.age + """ + Then the result should be, in any order: + | VertexID | player.age | + | "Kobe Bryant" | 40 | + And the execution plan should be: + | id | name | dependencies | operator info | + | 3 | Project | 4 | | + | 4 | IndexScan | 0 | | + | 0 | Start | | | + + Scenario: Tag with complex relational IN filter + Given an empty graph + And load "nba" csv data to a new space + # (a IN b) AND (c IN d) while a, c both have indexes When profiling query: """ LOOKUP ON player WHERE player.age IN [40, 25] AND player.name IN ["ABC", "Kobe Bryant"] YIELD player.age @@ -147,6 +177,72 @@ Feature: Lookup tag index full scan | 3 | Project | 4 | | | 4 | IndexScan | 0 | | | 0 | Start | | | + # (a IN b) AND (c IN d) while a, c have a composite index + When executing query: + """ + CREATE TAG INDEX composite_player_name_age_index ON player(name(64), age); + """ + Then the execution should be successful + And wait 6 seconds + When submit a job: + """ + REBUILD TAG INDEX composite_player_name_age_index + """ + Then wait the job to finish + When profiling query: + """ + LOOKUP ON player WHERE player.age IN [40, 25] AND player.name IN ["ABC", "Kobe Bryant"] YIELD player.age + """ + Then the result should be, in any order: + | VertexID | player.age | + | "Kobe Bryant" | 40 | + And the execution plan should be: + | id | name | dependencies | operator info | + | 3 | Project | 4 | | + | 4 | IndexScan | 0 | | + | 0 | Start | | | + # (a IN b) AND (c IN d) while only a has index + # first drop tag index + When executing query: + """ + DROP TAG INDEX composite_player_name_age_index + """ + Then the execution should be successful + When executing query: + """ + DROP TAG INDEX player_name_index + """ + Then the execution should be successful + And wait 6 seconds + # since the tag index has been dropped, here a TagIndexFullScan should be performed + When profiling query: + """ + LOOKUP ON player WHERE player.name IN ["ABC", "Kobe Bryant"] YIELD player.age + """ + Then the result should be, in any order: + | VertexID | player.age | + | "Kobe Bryant" | 40 | + And the execution plan should be: + | id | name | dependencies | operator info | + | 3 | Project | 2 | | + | 2 | Filter | 4 | | + | 4 | TagIndexFullScan | 0 | | + | 0 | Start | | | + When profiling query: + """ + LOOKUP ON player WHERE player.age IN [40, 25] AND player.name IN ["ABC", "Kobe Bryant"] YIELD player.age + """ + Then the result should be, in any order: + | VertexID | player.age | + | "Kobe Bryant" | 40 | + And the execution plan should be: + | id | name | dependencies | operator info | + | 3 | Project | 4 | | + | 4 | IndexScan | 0 | | + | 0 | Start | | | + Then drop the used space + + Scenario: Tag with relational NOT IN filter When profiling query: """ LOOKUP ON team WHERE team.name NOT IN ["Hornets", "Jazz"] @@ -264,7 +360,7 @@ Feature: Lookup tag index full scan """ Then a SemanticError should be raised at runtime: Expression (team.name NOT CONTAINS "ABC") is not supported, please use full-text index as an optimal solution - Scenario: Tag with relational STARTS/NOT STARTS WITH filter + Scenario: Tag with relational STARTS WITH filter When profiling query: """ LOOKUP ON team WHERE team.name STARTS WITH toUpper("t") @@ -295,41 +391,7 @@ Feature: Lookup tag index full scan """ LOOKUP ON team WHERE team.name NOT STARTS WITH toUpper("t") """ - Then the result should be, in any order: - | VertexID | - | "76ers" | - | "Bucks" | - | "Bulls" | - | "Cavaliers" | - | "Celtics" | - | "Clippers" | - | "Grizzlies" | - | "Hawks" | - | "Heat" | - | "Hornets" | - | "Jazz" | - | "Kings" | - | "Knicks" | - | "Lakers" | - | "Magic" | - | "Mavericks" | - | "Nets" | - | "Nuggets" | - | "Pacers" | - | "Pelicans" | - | "Pistons" | - | "Raptors" | - | "Rockets" | - | "Spurs" | - | "Suns" | - | "Wizards" | - | "Warriors" | - And the execution plan should be: - | id | name | dependencies | operator info | - | 3 | Project | 2 | | - | 2 | Filter | 4 | {"condition": "(team.name NOT STARTS WITH \"T\")"} | - | 4 | TagIndexFullScan | 0 | | - | 0 | Start | | | + Then a SemanticError should be raised at runtime: Expression (team.name NOT STARTS WITH toUpper("t")) is not supported, please use full-text index as an optimal solution Scenario: Tag with relational ENDS/NOT ENDS WITH filter When executing query: