From 428b220fd2bb38e61ab706cb79e6ad04deb4dac3 Mon Sep 17 00:00:00 2001 From: nebula-bots <88429921+nebula-bots@users.noreply.github.com> Date: Wed, 7 Dec 2022 12:59:34 +0800 Subject: [PATCH] optimize left join predicate and eliminate useless appendVertices (#1848) * optimize left join predicate & Eliminate useless appendVertices * fix * update * address leftExprs&rightExprs * address comments Co-authored-by: jie.wang <38901892+jievince@users.noreply.github.com> --- src/common/function/FunctionManager.cpp | 5 + src/graph/optimizer/CMakeLists.txt | 1 + .../rule/OptimizeLeftJoinPredicateRule.cpp | 167 ++++++++++++++++++ .../rule/OptimizeLeftJoinPredicateRule.h | 46 +++++ .../features/match/MultiQueryParts.feature | 23 ++- .../OptimizeLeftJoinPredicateRule.feature | 64 +++++++ .../PushFilterDownTraverseRule.feature | 3 +- 7 files changed, 295 insertions(+), 14 deletions(-) create mode 100644 src/graph/optimizer/rule/OptimizeLeftJoinPredicateRule.cpp create mode 100644 src/graph/optimizer/rule/OptimizeLeftJoinPredicateRule.h create mode 100644 tests/tck/features/optimizer/OptimizeLeftJoinPredicateRule.feature diff --git a/src/common/function/FunctionManager.cpp b/src/common/function/FunctionManager.cpp index 924c5c5e654..bca4d67d832 100644 --- a/src/common/function/FunctionManager.cpp +++ b/src/common/function/FunctionManager.cpp @@ -2007,6 +2007,11 @@ FunctionManager::FunctionManager() { }; } { + // `none_direct_dst` always return the dstId of an edge key + // without considering the direction of the edge type. + // The encoding of the edge key is: + // type(1) + partId(3) + srcId(*) + edgeType(4) + edgeRank(8) + dstId(*) + placeHolder(1) + // More information of encoding could be found in `NebulaKeyUtils.h` auto &attr = functions_["none_direct_dst"]; attr.minArity_ = 1; attr.maxArity_ = 1; diff --git a/src/graph/optimizer/CMakeLists.txt b/src/graph/optimizer/CMakeLists.txt index 2a1d5279a44..3e0d4df2f2b 100644 --- a/src/graph/optimizer/CMakeLists.txt +++ b/src/graph/optimizer/CMakeLists.txt @@ -58,6 +58,7 @@ nebula_add_library( rule/PushLimitDownScanEdgesRule.cpp rule/RemoveProjectDedupBeforeGetDstBySrcRule.cpp rule/PushFilterDownTraverseRule.cpp + rule/OptimizeLeftJoinPredicateRule.cpp ) nebula_add_subdirectory(test) diff --git a/src/graph/optimizer/rule/OptimizeLeftJoinPredicateRule.cpp b/src/graph/optimizer/rule/OptimizeLeftJoinPredicateRule.cpp new file mode 100644 index 00000000000..d9af805ae24 --- /dev/null +++ b/src/graph/optimizer/rule/OptimizeLeftJoinPredicateRule.cpp @@ -0,0 +1,167 @@ +// Copyright (c) 2022 vesoft inc. All rights reserved. +// +// This source code is licensed under Apache 2.0 License. + +#include "graph/optimizer/rule/OptimizeLeftJoinPredicateRule.h" + +#include "graph/optimizer/OptContext.h" +#include "graph/optimizer/OptGroup.h" +#include "graph/planner/plan/PlanNode.h" +#include "graph/planner/plan/Query.h" +#include "graph/util/ExpressionUtils.h" + +using nebula::graph::PlanNode; +using nebula::graph::QueryContext; + +namespace nebula { +namespace opt { + +std::unique_ptr OptimizeLeftJoinPredicateRule::kInstance = + std::unique_ptr(new OptimizeLeftJoinPredicateRule()); + +OptimizeLeftJoinPredicateRule::OptimizeLeftJoinPredicateRule() { + RuleSet::QueryRules().addRule(this); +} + +const Pattern& OptimizeLeftJoinPredicateRule::pattern() const { + static Pattern pattern = Pattern::create( + PlanNode::Kind::kHashLeftJoin, + {Pattern::create(PlanNode::Kind::kUnknown), + Pattern::create(PlanNode::Kind::kProject, + {Pattern::create(PlanNode::Kind::kAppendVertices, + {Pattern::create(PlanNode::Kind::kTraverse)})})}); + return pattern; +} + +StatusOr OptimizeLeftJoinPredicateRule::transform( + OptContext* octx, const MatchedResult& matched) const { + auto* leftJoinGroupNode = matched.node; + auto* leftJoinGroup = leftJoinGroupNode->group(); + auto* leftJoin = static_cast(leftJoinGroupNode->node()); + + auto* projectGroupNode = matched.dependencies[1].node; + auto* project = static_cast(projectGroupNode->node()); + + auto* appendVerticesGroupNode = matched.dependencies[1].dependencies[0].node; + auto appendVertices = + static_cast(matched.dependencies[1].dependencies[0].node->node()); + + auto traverse = static_cast( + matched.dependencies[1].dependencies[0].dependencies[0].node->node()); + + auto& avNodeAlias = appendVertices->nodeAlias(); + + auto& tvEdgeAlias = traverse->edgeAlias(); + + auto& leftExprs = leftJoin->hashKeys(); + auto& rightExprs = leftJoin->probeKeys(); + + bool found = false; + size_t rightExprIdx = 0; + for (size_t i = 0; i < rightExprs.size(); ++i) { + auto* rightExpr = rightExprs[i]; + if (rightExpr->kind() != Expression::Kind::kFunctionCall) { + continue; + } + auto* func = static_cast(rightExpr); + if (func->name() != "id" && func->name() != "_joinkey") { + continue; + } + auto& args = func->args()->args(); + DCHECK_EQ(args.size(), 1); + auto* arg = args[0]; + if (arg->kind() != Expression::Kind::kInputProperty) { + continue; + } + auto& alias = static_cast(arg)->prop(); + if (alias != avNodeAlias) continue; + // Must check if left exprs contain the same key + if (*leftExprs[i] != *rightExpr) { + return TransformResult::noTransform(); + } + if (found) { + return TransformResult::noTransform(); + } + rightExprIdx = i; + found = true; + } + if (!found) { + return TransformResult::noTransform(); + } + + found = false; + size_t prjIdx = 0; + auto* columns = project->columns(); + for (size_t i = 0; i < columns->size(); ++i) { + const auto* col = columns->columns()[i]; + if (col->expr()->kind() != Expression::Kind::kInputProperty) { + continue; + } + auto* inputProp = static_cast(col->expr()); + if (inputProp->prop() != avNodeAlias) continue; + if (found) { + return TransformResult::noTransform(); + } + prjIdx = i; + found = true; + } + if (!found) { + return TransformResult::noTransform(); + } + + auto* pool = octx->qctx()->objPool(); + // Let the new project generate expr `none_direct_dst($-.tvEdgeAlias)`, + // and let the new left join use it as right expr + auto* args = ArgumentList::make(pool); + args->addArgument(InputPropertyExpression::make(pool, tvEdgeAlias)); + auto* newPrjExpr = FunctionCallExpression::make(pool, "none_direct_dst", args); + + auto oldYieldColumns = project->columns()->columns(); + auto* newYieldColumns = pool->makeAndAdd(); + for (size_t i = 0; i < oldYieldColumns.size(); ++i) { + if (i == prjIdx) { + newYieldColumns->addColumn(new YieldColumn(newPrjExpr, avNodeAlias)); + } else { + newYieldColumns->addColumn(oldYieldColumns[i]->clone().release()); + } + } + auto* newProject = graph::Project::make(octx->qctx(), nullptr, newYieldColumns); + + // $-.`avNodeAlias` + auto* newRightExpr = InputPropertyExpression::make(pool, avNodeAlias); + std::vector newRightExprs; + for (size_t i = 0; i < rightExprs.size(); ++i) { + if (i == rightExprIdx) { + newRightExprs.emplace_back(newRightExpr); + } else { + newRightExprs.emplace_back(rightExprs[i]->clone()); + } + } + auto* newLeftJoin = + graph::HashLeftJoin::make(octx->qctx(), nullptr, nullptr, leftExprs, newRightExprs); + + TransformResult result; + result.eraseAll = true; + + newProject->setInputVar(appendVertices->inputVar()); + auto newProjectGroup = OptGroup::create(octx); + auto* newProjectGroupNode = newProjectGroup->makeGroupNode(newProject); + newProjectGroupNode->setDeps(appendVerticesGroupNode->dependencies()); + + newLeftJoin->setLeftVar(leftJoin->leftInputVar()); + newLeftJoin->setRightVar(newProject->outputVar()); + newLeftJoin->setOutputVar(leftJoin->outputVar()); + auto* newLeftJoinGroupNode = OptGroupNode::create(octx, newLeftJoin, leftJoinGroup); + newLeftJoinGroupNode->dependsOn(leftJoinGroupNode->dependencies()[0]); + newLeftJoinGroupNode->dependsOn(newProjectGroup); + + result.newGroupNodes.emplace_back(newLeftJoinGroupNode); + return result; +} + +std::string OptimizeLeftJoinPredicateRule::toString() const { + return "OptimizeLeftJoinPredicateRule"; +} + +} // namespace opt +} // namespace nebula diff --git a/src/graph/optimizer/rule/OptimizeLeftJoinPredicateRule.h b/src/graph/optimizer/rule/OptimizeLeftJoinPredicateRule.h new file mode 100644 index 00000000000..ef27924af2e --- /dev/null +++ b/src/graph/optimizer/rule/OptimizeLeftJoinPredicateRule.h @@ -0,0 +1,46 @@ +/* Copyright (c) 2022 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +#pragma once + +#include "graph/optimizer/OptRule.h" + +namespace nebula { +namespace opt { +// Before: +// HashLeftJoin({id(v)}, {id(v)}) +// | | +// ... Project +// | | +// AppendVertices(v) AppendVertices(v) +// | | +// ... Traverse(e) +// +// After: +// HashLeftJoin({id(v)}, {$-.v}) +// | | +// ... Project(..., none_direct_dst(e) AS v) +// | | +// AppendVertices(v) Traverse(e) +// | +// ... +// +class OptimizeLeftJoinPredicateRule final : public OptRule { + public: + const Pattern &pattern() const override; + + StatusOr transform(OptContext *qctx, + const MatchedResult &matched) const override; + + std::string toString() const override; + + private: + OptimizeLeftJoinPredicateRule(); + + static std::unique_ptr kInstance; +}; + +} // namespace opt +} // namespace nebula diff --git a/tests/tck/features/match/MultiQueryParts.feature b/tests/tck/features/match/MultiQueryParts.feature index 31599bcf9f1..c51338e321b 100644 --- a/tests/tck/features/match/MultiQueryParts.feature +++ b/tests/tck/features/match/MultiQueryParts.feature @@ -181,16 +181,16 @@ Feature: Multi Query Parts OPTIONAL MATCH (v3:player)-[:like]->(v1)<-[e5]-(v4) where id(v3) == "Tim Duncan" return * """ Then the result should be, in any order, with relax comparison: - | v1 | v2 | e3 | v4 | v3 | e5 | - | ("Tony Parker") | ("Tony Parker") | [:like "Tony Parker"->"LaMarcus Aldridge" @0 {likeness: 90}] | ("LaMarcus Aldridge") | ("Tim Duncan") | [:like "LaMarcus Aldridge"->"Tony Parker" @0 {}] | - | ("Tony Parker") | ("Tim Duncan") | [:like "Tim Duncan"->"Tony Parker" @0 {likeness: 95}] | ("Tony Parker") | __NULL__ | __NULL__ | - | ("Tony Parker") | ("Tim Duncan") | [:like "Tim Duncan"->"Tony Parker" @0 {likeness: 95}] | ("Tony Parker") | __NULL__ | __NULL__ | - | ("Tony Parker") | ("Tony Parker") | [:like "Tony Parker"->"Tim Duncan" @0 {likeness: 95}] | ("Tim Duncan") | ("Tim Duncan") | [:teammate "Tim Duncan"->"Tony Parker" @0 {}] | - | ("Tony Parker") | ("Manu Ginobili" :player{age: 41, name: "Manu Ginobili"}) | [:like "Manu Ginobili"->"Tim Duncan" @0 {likeness: 90}] | ("Tim Duncan") | ("Tim Duncan") | [:teammate "Tim Duncan"->"Tony Parker" @0 {}] | - | ("Tony Parker") | ("Tony Parker") | [:like "Tony Parker"->"Manu Ginobili" @0 {likeness: 95}] | ("Manu Ginobili") | ("Tim Duncan") | [:teammate "Manu Ginobili"->"Tony Parker" @0 {}] | - | ("Tony Parker") | ("Tony Parker") | [:like "Tony Parker"->"Manu Ginobili" @0 {likeness: 95}] | ("Manu Ginobili") | ("Tim Duncan") | [:teammate "Manu Ginobili"->"Tony Parker" @0 {}] | - | ("Tony Parker") | ("Tim Duncan") | [:like "Tim Duncan"->"Manu Ginobili" @0 {likeness: 95}] | ("Manu Ginobili") | ("Tim Duncan") | [:teammate "Manu Ginobili"->"Tony Parker" @0 {}] | - | ("Tony Parker") | ("Tim Duncan") | [:like "Tim Duncan"->"Manu Ginobili" @0 {likeness: 95}] | ("Manu Ginobili") | ("Tim Duncan") | [:teammate "Manu Ginobili"->"Tony Parker" @0 {}] | + | v1 | v2 | e3 | v4 | v3 | e5 | + | ("Tony Parker") | ("Tony Parker") | [:like "Tony Parker"->"LaMarcus Aldridge" @0] | ("LaMarcus Aldridge") | ("Tim Duncan") | [:like "LaMarcus Aldridge"->"Tony Parker" @0] | + | ("Tony Parker") | ("Tim Duncan") | [:like "Tim Duncan"->"Tony Parker" @0 ] | ("Tony Parker") | __NULL__ | __NULL__ | + | ("Tony Parker") | ("Tim Duncan") | [:like "Tim Duncan"->"Tony Parker" @0] | ("Tony Parker") | __NULL__ | __NULL__ | + | ("Tony Parker") | ("Tony Parker") | [:like "Tony Parker"->"Tim Duncan" @0 ] | ("Tim Duncan") | ("Tim Duncan") | [:teammate "Tim Duncan"->"Tony Parker" @0] | + | ("Tony Parker") | ("Manu Ginobili") | [:like "Manu Ginobili"->"Tim Duncan" @0 ] | ("Tim Duncan") | ("Tim Duncan") | [:teammate "Tim Duncan"->"Tony Parker" @0] | + | ("Tony Parker") | ("Tony Parker") | [:like "Tony Parker"->"Manu Ginobili" @0] | ("Manu Ginobili") | ("Tim Duncan") | [:teammate "Manu Ginobili"->"Tony Parker" @0] | + | ("Tony Parker") | ("Tony Parker") | [:like "Tony Parker"->"Manu Ginobili" @0 ] | ("Manu Ginobili") | ("Tim Duncan") | [:teammate "Manu Ginobili"->"Tony Parker" @0] | + | ("Tony Parker") | ("Tim Duncan") | [:like "Tim Duncan"->"Manu Ginobili" @0 ] | ("Manu Ginobili") | ("Tim Duncan") | [:teammate "Manu Ginobili"->"Tony Parker" @0 ] | + | ("Tony Parker") | ("Tim Duncan") | [:like "Tim Duncan"->"Manu Ginobili" @0] | ("Manu Ginobili") | ("Tim Duncan") | [:teammate "Manu Ginobili"->"Tony Parker" @0] | # The redudant Project after HashLeftJoin is removed now And the execution plan should be: | id | name | dependencies | profiling data | operator info | @@ -203,8 +203,7 @@ Feature: Multi Query Parts | 1 | PassThrough | 3 | | | | 3 | Start | | | | | 14 | Project | 13 | | | - | 13 | AppendVertices | 12 | | | - | 12 | Traverse | 21 | | | + | 13 | Traverse | 21 | | | | 21 | Traverse | 9 | | | | 9 | Dedup | 8 | | | | 8 | PassThrough | 10 | | | diff --git a/tests/tck/features/optimizer/OptimizeLeftJoinPredicateRule.feature b/tests/tck/features/optimizer/OptimizeLeftJoinPredicateRule.feature new file mode 100644 index 00000000000..ab6fbe9e736 --- /dev/null +++ b/tests/tck/features/optimizer/OptimizeLeftJoinPredicateRule.feature @@ -0,0 +1,64 @@ +# Copyright (c) 2022 vesoft inc. All rights reserved. +# +# This source code is licensed under Apache 2.0 License. +Feature: Optimize left join predicate + + Background: + Given a graph with space named "nba" + + Scenario: optimize left join predicate + When profiling query: + """ + MATCH (person:player)-[:like*1..2]-(friend:player)-[:serve]->(friendTeam:team) + WHERE id(person) == "Tony Parker" AND id(friend) != "Tony Parker" + WITH DISTINCT friend, friendTeam + OPTIONAL MATCH (friend)<-[:like]-(friend2:player)<-[:like]-(friendTeam) + WITH friendTeam, count(friend2) AS numFriends + RETURN + id(friendTeam) AS teamId, + friendTeam.team.name AS teamName, + numFriends + ORDER BY teamName DESC + """ + Then the result should be, in order, with relax comparison: + | teamId | teamName | numFriends | + | "Warriors" | "Warriors" | 0 | + | "Trail Blazers" | "Trail Blazers" | 0 | + | "Thunders" | "Thunders" | 0 | + | "Suns" | "Suns" | 0 | + | "Spurs" | "Spurs" | 0 | + | "Rockets" | "Rockets" | 0 | + | "Raptors" | "Raptors" | 0 | + | "Pistons" | "Pistons" | 0 | + | "Magic" | "Magic" | 0 | + | "Lakers" | "Lakers" | 0 | + | "Kings" | "Kings" | 0 | + | "Jazz" | "Jazz" | 0 | + | "Hornets" | "Hornets" | 0 | + | "Heat" | "Heat" | 0 | + | "Hawks" | "Hawks" | 0 | + | "Grizzlies" | "Grizzlies" | 0 | + | "Clippers" | "Clippers" | 0 | + | "Celtics" | "Celtics" | 0 | + | "Cavaliers" | "Cavaliers" | 0 | + | "Bulls" | "Bulls" | 0 | + | "76ers" | "76ers" | 0 | + And the execution plan should be: + | id | name | dependencies | operator info | + | 21 | Sort | 18 | | + | 18 | Project | 17 | | + | 17 | Aggregate | 16 | | + | 16 | HashLeftJoin | 10,15 | {"hashKeys": ["_joinkey($-.friendTeam)", "_joinkey($-.friend)"], "probeKeys": ["$-.friendTeam", "_joinkey($-.friend)"]} | + | 10 | Dedup | 28 | | + | 28 | Project | 22 | | + | 22 | Filter | 26 | | + | 26 | AppendVertices | 25 | | + | 25 | Traverse | 24 | | + | 24 | Traverse | 2 | | + | 2 | Dedup | 1 | | + | 1 | PassThrough | 3 | | + | 3 | Start | | | + | 15 | Project | 14 | {"columns": ["$-.friend AS friend", "$-.friend2 AS friend2", "none_direct_dst($-.__VAR_3) AS friendTeam"]} | + | 14 | Traverse | 12 | | + | 12 | Traverse | 11 | | + | 11 | Argument | | | diff --git a/tests/tck/features/optimizer/PushFilterDownTraverseRule.feature b/tests/tck/features/optimizer/PushFilterDownTraverseRule.feature index 56aed295543..81ef5260791 100644 --- a/tests/tck/features/optimizer/PushFilterDownTraverseRule.feature +++ b/tests/tck/features/optimizer/PushFilterDownTraverseRule.feature @@ -65,8 +65,7 @@ Feature: Push Filter down Traverse rule | 2 | Dedup | 1 | | | | 1 | PassThrough | 3 | | | | 3 | Start | | | | - | 15 | Project | 15 | | | - | 30 | AppendVertices | 14 | | | + | 15 | Project | 14 | | | | 14 | Traverse | 12 | | | | 12 | Traverse | 11 | | | | 11 | Argument | | | |