diff --git a/src/graph/optimizer/CMakeLists.txt b/src/graph/optimizer/CMakeLists.txt index c2b00985975..f5ec5cc28c1 100644 --- a/src/graph/optimizer/CMakeLists.txt +++ b/src/graph/optimizer/CMakeLists.txt @@ -27,6 +27,7 @@ nebula_add_library( rule/PushFilterDownAggregateRule.cpp rule/PushFilterDownProjectRule.cpp rule/PushFilterDownLeftJoinRule.cpp + rule/PushFilterDownInnerJoinRule.cpp rule/PushFilterDownNodeRule.cpp rule/PushFilterDownScanVerticesRule.cpp rule/PushVFilterDownScanVerticesRule.cpp diff --git a/src/graph/optimizer/rule/PushFilterDownInnerJoinRule.cpp b/src/graph/optimizer/rule/PushFilterDownInnerJoinRule.cpp new file mode 100644 index 00000000000..566d5d1be1c --- /dev/null +++ b/src/graph/optimizer/rule/PushFilterDownInnerJoinRule.cpp @@ -0,0 +1,134 @@ +/* Copyright (c) 2022 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +#include "graph/optimizer/rule/PushFilterDownInnerJoinRule.h" + +#include "graph/optimizer/OptContext.h" +#include "graph/optimizer/OptGroup.h" +#include "graph/planner/plan/PlanNode.h" +#include "graph/planner/plan/Query.h" +#include "graph/util/ExpressionUtils.h" + +using nebula::graph::PlanNode; +using nebula::graph::QueryContext; + +namespace nebula { +namespace opt { + +std::unique_ptr PushFilterDownInnerJoinRule::kInstance = + std::unique_ptr(new PushFilterDownInnerJoinRule()); + +PushFilterDownInnerJoinRule::PushFilterDownInnerJoinRule() { + RuleSet::QueryRules().addRule(this); +} + +const Pattern& PushFilterDownInnerJoinRule::pattern() const { + static Pattern pattern = Pattern::create(graph::PlanNode::Kind::kFilter, + {Pattern::create(graph::PlanNode::Kind::kInnerJoin)}); + return pattern; +} + +StatusOr PushFilterDownInnerJoinRule::transform( + OptContext* octx, const MatchedResult& matched) const { + auto* filterGroupNode = matched.node; + auto* oldFilterNode = filterGroupNode->node(); + auto deps = matched.dependencies; + DCHECK_EQ(deps.size(), 1); + auto innerJoinGroupNode = deps.front().node; + auto* innerJoinNode = innerJoinGroupNode->node(); + DCHECK_EQ(oldFilterNode->kind(), PlanNode::Kind::kFilter); + DCHECK_EQ(innerJoinNode->kind(), PlanNode::Kind::kInnerJoin); + auto* oldInnerJoinNode = static_cast(innerJoinNode); + const auto* condition = static_cast(oldFilterNode)->condition(); + DCHECK(condition); + const std::pair& leftVar = oldInnerJoinNode->leftVar(); + auto symTable = octx->qctx()->symTable(); + std::vector leftVarColNames = symTable->getVar(leftVar.first)->colNames; + + // split the `condition` based on whether the varPropExpr comes from the left + // child + auto picker = [&leftVarColNames](const Expression* e) -> bool { + auto varProps = graph::ExpressionUtils::collectAll(e, {Expression::Kind::kVarProperty}); + if (varProps.empty()) { + return false; + } + std::vector propNames; + for (auto* expr : varProps) { + DCHECK(expr->kind() == Expression::Kind::kVarProperty); + propNames.emplace_back(static_cast(expr)->prop()); + } + for (auto prop : propNames) { + auto iter = std::find_if(leftVarColNames.begin(), + leftVarColNames.end(), + [&prop](std::string item) { return !item.compare(prop); }); + if (iter == leftVarColNames.end()) { + return false; + } + } + return true; + }; + Expression* filterPicked = nullptr; + Expression* filterUnpicked = nullptr; + graph::ExpressionUtils::splitFilter(condition, picker, &filterPicked, &filterUnpicked); + + if (!filterPicked) { + return TransformResult::noTransform(); + } + + // produce new left Filter node + auto* newLeftFilterNode = + graph::Filter::make(octx->qctx(), + const_cast(oldInnerJoinNode->dep()), + graph::ExpressionUtils::rewriteInnerVar(filterPicked, leftVar.first)); + newLeftFilterNode->setInputVar(leftVar.first); + newLeftFilterNode->setColNames(leftVarColNames); + auto newFilterGroup = OptGroup::create(octx); + auto newFilterGroupNode = newFilterGroup->makeGroupNode(newLeftFilterNode); + for (auto dep : innerJoinGroupNode->dependencies()) { + newFilterGroupNode->dependsOn(dep); + } + auto newLeftFilterOutputVar = newLeftFilterNode->outputVar(); + + // produce new InnerJoin node + auto* newInnerJoinNode = static_cast(oldInnerJoinNode->clone()); + newInnerJoinNode->setLeftVar({newLeftFilterOutputVar, 0}); + const std::vector& hashKeys = oldInnerJoinNode->hashKeys(); + std::vector newHashKeys; + for (auto* k : hashKeys) { + newHashKeys.emplace_back(graph::ExpressionUtils::rewriteInnerVar(k, newLeftFilterOutputVar)); + } + newInnerJoinNode->setHashKeys(newHashKeys); + + TransformResult result; + result.eraseAll = true; + if (filterUnpicked) { + auto* newAboveFilterNode = graph::Filter::make(octx->qctx(), newInnerJoinNode); + newAboveFilterNode->setOutputVar(oldFilterNode->outputVar()); + newAboveFilterNode->setCondition(filterUnpicked); + auto newAboveFilterGroupNode = + OptGroupNode::create(octx, newAboveFilterNode, filterGroupNode->group()); + + auto newInnerJoinGroup = OptGroup::create(octx); + auto newInnerJoinGroupNode = newInnerJoinGroup->makeGroupNode(newInnerJoinNode); + newAboveFilterGroupNode->setDeps({newInnerJoinGroup}); + newInnerJoinGroupNode->setDeps({newFilterGroup}); + result.newGroupNodes.emplace_back(newAboveFilterGroupNode); + } else { + newInnerJoinNode->setOutputVar(oldFilterNode->outputVar()); + newInnerJoinNode->setColNames(oldInnerJoinNode->colNames()); + auto newInnerJoinGroupNode = + OptGroupNode::create(octx, newInnerJoinNode, filterGroupNode->group()); + newInnerJoinGroupNode->setDeps({newFilterGroup}); + result.newGroupNodes.emplace_back(newInnerJoinGroupNode); + } + return result; +} + +std::string PushFilterDownInnerJoinRule::toString() const { + return "PushFilterDownInnerJoinRule"; +} + +} // namespace opt +} // namespace nebula diff --git a/src/graph/optimizer/rule/PushFilterDownInnerJoinRule.h b/src/graph/optimizer/rule/PushFilterDownInnerJoinRule.h new file mode 100644 index 00000000000..e003e346a75 --- /dev/null +++ b/src/graph/optimizer/rule/PushFilterDownInnerJoinRule.h @@ -0,0 +1,66 @@ +/* Copyright (c) 2022 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +#ifndef GRAPH_OPTIMIZER_RULE_PUSHFILTERDOWNINNERJOINRULE_H_ +#define GRAPH_OPTIMIZER_RULE_PUSHFILTERDOWNINNERJOINRULE_H_ + +#include "graph/optimizer/OptRule.h" + +namespace nebula { +namespace opt { + +// Push down the filter items from the left subplan of [[InnerJoin]] +// Required conditions: +// 1. Match the pattern +// Benefits: +// 1. Filter data early to optimize performance +// +// Tranformation: +// Before: +// +// +-----------+-----------+ +// | Filter | +// | ($left>3 and $right<4)| +// +-----------+-----------+ +// | +// +------+------+ +// | InnerJoin | +// +------+------+ +// +// After: +// +// +------+------+ +// | Filter | +// | ($right<4) | +// +------+------+ +// | +// +------+------+ +// | InnerJoin | +// +------+------+ +// / +// +------+------+ +// | Filter | +// | ($left>3) | +// +------+------+ + +class PushFilterDownInnerJoinRule final : public OptRule { + public: + const Pattern &pattern() const override; + + StatusOr transform(OptContext *qctx, + const MatchedResult &matched) const override; + + std::string toString() const override; + + private: + PushFilterDownInnerJoinRule(); + + static std::unique_ptr kInstance; +}; + +} // namespace opt +} // namespace nebula + +#endif // GRAPH_OPTIMIZER_RULE_PUSHFILTERDOWNINNERJOINRULE_H_ diff --git a/tests/tck/features/optimizer/PushFilterDownInnerJoinRule.feature b/tests/tck/features/optimizer/PushFilterDownInnerJoinRule.feature new file mode 100644 index 00000000000..c74487c6741 --- /dev/null +++ b/tests/tck/features/optimizer/PushFilterDownInnerJoinRule.feature @@ -0,0 +1,31 @@ +# Copyright (c) 2021 vesoft inc. All rights reserved. +# +# This source code is licensed under Apache 2.0 License. +Feature: Push Filter down InnerJoin rule + + Background: + Given a graph with space named "nba" + + Scenario: push filter down InnerJoin + When profiling query: + """ + LOOKUP ON player WHERE player.name == "Tony Parker" + YIELD id(vertex) as id | + GO FROM $-.id OVER like + WHERE (like.likeness - 1) >= 0 + YIELD like._src AS src_id, like._dst AS dst_id, like.likeness AS likeness + """ + Then the result should be, in any order: + | src_id | dst_id | likeness | + | "Tony Parker" | "LaMarcus Aldridge" | 90 | + | "Tony Parker" | "Manu Ginobili" | 95 | + | "Tony Parker" | "Tim Duncan" | 95 | + And the execution plan should be: + | id | name | dependencies | operator info | + | 10 | Project | 15 | | + | 15 | InnerJoin | 17 | | + | 17 | Project | 18 | | + | 18 | GetNeighbors | 3 | | + | 3 | Project | 11 | | + | 11 | TagIndexPrefixScan | 0 | | + | 0 | Start | | | diff --git a/tests/tck/features/optimizer/PushFilterDownLeftJoinRule.feature b/tests/tck/features/optimizer/PushFilterDownLeftJoinRule.feature index 0eb9994526b..74a4cdabb4c 100644 --- a/tests/tck/features/optimizer/PushFilterDownLeftJoinRule.feature +++ b/tests/tck/features/optimizer/PushFilterDownLeftJoinRule.feature @@ -22,23 +22,23 @@ Feature: Push Filter down LeftJoin rule | "Tim Duncan" | And the execution plan should be: | id | name | dependencies | operator info | - | 24 | Project | 23 | | - | 23 | Filter | 22 | | - | 22 | InnerJoin | 21 | | + | 24 | Project | 34 | | + | 34 | InnerJoin | 33 | | + | 33 | Filter | 21 | | | 21 | LeftJoin | 20 | | | 20 | Project | 19 | | | 19 | GetVertices | 18 | | - | 18 | Project | 31 | | - | 31 | GetNeighbors | 14 | | - | 14 | Project | 13 | | - | 13 | Filter | 12 | | - | 12 | InnerJoin | 11 | | + | 18 | Project | 30 | | + | 30 | GetNeighbors | 14 | | + | 14 | Project | 32 | | + | 32 | InnerJoin | 31 | | + | 31 | Filter | 11 | | | 11 | LeftJoin | 10 | | | 10 | Project | 9 | | | 9 | GetVertices | 8 | | - | 8 | Project | 30 | | - | 30 | GetNeighbors | 27 | | - | 27 | Project | 25 | | + | 8 | Project | 29 | | + | 29 | GetNeighbors | 26 | | + | 26 | Project | 25 | | | 25 | TagIndexPrefixScan | 0 | | | 0 | Start | | | When profiling query: