diff --git a/src/graph/context/Symbols.cpp b/src/graph/context/Symbols.cpp index e360ee897d1..83a493715b2 100644 --- a/src/graph/context/Symbols.cpp +++ b/src/graph/context/Symbols.cpp @@ -81,14 +81,6 @@ bool SymbolTable::deleteWrittenBy(const std::string& varName, PlanNode* node) { if (var == vars_.end()) { return false; } - for (auto& alias : var->second->colNames) { - auto found = aliasGeneratedBy_.find(alias); - if (found != aliasGeneratedBy_.end()) { - if (found->second == varName) { - aliasGeneratedBy_.erase(alias); - } - } - } var->second->writtenBy.erase(node); return true; } @@ -106,6 +98,7 @@ bool SymbolTable::updateWrittenBy(const std::string& oldVar, } Variable* SymbolTable::getVar(const std::string& varName) { + DCHECK(!varName.empty()) << "the variable name is empty"; auto var = vars_.find(varName); if (var == vars_.end()) { return nullptr; @@ -114,22 +107,5 @@ Variable* SymbolTable::getVar(const std::string& varName) { } } -void SymbolTable::setAliasGeneratedBy(const std::vector& aliases, - const std::string& varName) { - for (auto& alias : aliases) { - if (aliasGeneratedBy_.count(alias) == 0) { - aliasGeneratedBy_.emplace(alias, varName); - } - } -} - -StatusOr SymbolTable::getAliasGeneratedBy(const std::string& alias) { - auto found = aliasGeneratedBy_.find(alias); - if (found == aliasGeneratedBy_.end()) { - return Status::Error("Not found a variable that generates the alias: %s", alias.c_str()); - } else { - return found->second; - } -} } // namespace graph } // namespace nebula diff --git a/src/graph/context/Symbols.h b/src/graph/context/Symbols.h index fb30d957770..bcf3b44b815 100644 --- a/src/graph/context/Symbols.h +++ b/src/graph/context/Symbols.h @@ -72,10 +72,6 @@ class SymbolTable final { Variable* getVar(const std::string& varName); - void setAliasGeneratedBy(const std::vector& aliases, const std::string& varName); - - StatusOr getAliasGeneratedBy(const std::string& alias); - std::string toString() const; private: @@ -85,8 +81,6 @@ class SymbolTable final { ExecutionContext* ectx_{nullptr}; // var name -> variable std::unordered_map vars_; - // alias -> first variable that generate the alias - std::unordered_map aliasGeneratedBy_; }; } // namespace graph diff --git a/src/graph/optimizer/CMakeLists.txt b/src/graph/optimizer/CMakeLists.txt index 37f4720bdf4..b27c20e97aa 100644 --- a/src/graph/optimizer/CMakeLists.txt +++ b/src/graph/optimizer/CMakeLists.txt @@ -27,6 +27,7 @@ nebula_add_library( rule/PushFilterDownAggregateRule.cpp rule/PushFilterDownProjectRule.cpp rule/PushFilterDownLeftJoinRule.cpp + rule/PushFilterDownHashInnerJoinRule.cpp rule/PushFilterDownInnerJoinRule.cpp rule/PushFilterDownNodeRule.cpp rule/PushFilterDownScanVerticesRule.cpp diff --git a/src/graph/optimizer/OptGroup.cpp b/src/graph/optimizer/OptGroup.cpp index 32f2a2ab0d8..340881b224d 100644 --- a/src/graph/optimizer/OptGroup.cpp +++ b/src/graph/optimizer/OptGroup.cpp @@ -41,9 +41,81 @@ OptGroup::OptGroup(OptContext *ctx) noexcept : ctx_(ctx) { DCHECK(ctx != nullptr); } +Status OptGroup::validateSubPlan(const OptGroupNode *gn, + const OptRule *rule, + const std::vector &patternLeaves) const { + auto &deps = DCHECK_NOTNULL(gn)->dependencies(); + + auto checkDepGroup = [this, gn, rule, &patternLeaves](const OptGroup *depGroup) -> Status { + auto iter = std::find(patternLeaves.begin(), patternLeaves.end(), depGroup); + if (iter != patternLeaves.end()) { + return Status::OK(); + } + if (!depGroup) { + return Status::Error("Could not find the dependent group in pattern leaves"); + } + if (depGroup->groupNodes_.size() != 1U || depGroup->groupNodesReferenced_.size() != 1U) { + return Status::Error( + "Invalid sub-plan generated when applying the rule: %s, " + "planNode: %s, numGroupNodes: %lu, numGroupNodesRef: %lu", + rule->toString().c_str(), + PlanNode::toString(gn->node()->kind()), + depGroup->groupNodes_.size(), + depGroup->groupNodesReferenced_.size()); + } + return validateSubPlan(depGroup->groupNodes_.front(), rule, patternLeaves); + }; + + switch (deps.size()) { + case 0: { + auto iter = std::find(patternLeaves.begin(), patternLeaves.end(), nullptr); + if (iter == patternLeaves.end()) { + return Status::Error("Invalid sub-plan generated by the rule: %s, planNode: %s", + rule->toString().c_str(), + PlanNode::toString(gn->node()->kind())); + } + break; + } + case 1: { + NG_RETURN_IF_ERROR(checkDepGroup(deps[0])); + break; + } + case 2: { + NG_RETURN_IF_ERROR(checkDepGroup(deps[0])); + NG_RETURN_IF_ERROR(checkDepGroup(deps[1])); + break; + } + default: { + return Status::Error("Invalid dependencies of opt group node: %lu", deps.size()); + } + } + return Status::OK(); +} + +Status OptGroup::validate(const OptRule *rule) const { + if (groupNodes_.empty() && !groupNodesReferenced_.empty()) { + return Status::Error( + "The OptGroup has no any OptGroupNode but used by other OptGroupNode " + "when applying the rule: %s, numGroupNodesRef: %lu", + rule->toString().c_str(), + groupNodesReferenced_.size()); + } + for (auto *gn : groupNodes_) { + NG_RETURN_IF_ERROR(gn->validate(rule)); + if (gn->node()->outputVar() != outputVar_) { + return Status::Error( + "The output columns of the OptGroupNode is different from OptGroup " + "when applying the rule: %s, %s vs. %s", + rule->toString().c_str(), + gn->node()->outputVar().c_str(), + outputVar_.c_str()); + } + } + return Status::OK(); +} + void OptGroup::addGroupNode(OptGroupNode *groupNode) { - DCHECK(groupNode != nullptr); - DCHECK(groupNode->group() == this); + DCHECK_EQ(this, DCHECK_NOTNULL(groupNode)->group()); if (outputVar_.empty()) { outputVar_ = groupNode->node()->outputVar(); } else { @@ -56,13 +128,9 @@ void OptGroup::addGroupNode(OptGroupNode *groupNode) { } OptGroupNode *OptGroup::makeGroupNode(PlanNode *node) { - if (outputVar_.empty()) { - outputVar_ = node->outputVar(); - } else { - DCHECK_EQ(outputVar_, node->outputVar()); - } - groupNodes_.emplace_back(OptGroupNode::create(ctx_, node, this)); - return groupNodes_.back(); + auto *gn = OptGroupNode::create(ctx_, node, this); + addGroupNode(gn); + return gn; } Status OptGroup::explore(const OptRule *rule) { @@ -71,9 +139,12 @@ Status OptGroup::explore(const OptRule *rule) { } setExplored(rule); + // TODO(yee): the opt group maybe in the loop body branch + // DCHECK(isRootGroup_ || !groupNodesReferenced_.empty()) + // << "Current group should be referenced by other group nodes before optimization"; + for (auto iter = groupNodes_.begin(); iter != groupNodes_.end();) { - auto groupNode = *iter; - DCHECK(groupNode != nullptr); + auto *groupNode = DCHECK_NOTNULL(*iter); if (groupNode->isExplored(rule)) { ++iter; continue; @@ -82,7 +153,7 @@ Status OptGroup::explore(const OptRule *rule) { NG_RETURN_IF_ERROR(groupNode->explore(rule)); // Find more equivalents - std::vector boundary; + std::vector leaves; auto status = rule->match(ctx_, groupNode); if (!status.ok()) { ++iter; @@ -90,15 +161,25 @@ Status OptGroup::explore(const OptRule *rule) { } ctx_->setChanged(true); auto matched = std::move(status).value(); - matched.collectBoundary(boundary); + matched.collectPatternLeaves(leaves); auto resStatus = rule->transform(ctx_, matched); NG_RETURN_IF_ERROR(resStatus); auto result = std::move(resStatus).value(); + + for (auto *gn : result.newGroupNodes) { + auto it = std::find(groupNodes_.begin(), groupNodes_.end(), gn); + if (it != groupNodes_.end()) { + return Status::Error("Should not push the OptGroupNode %s into the group in the rule: %s", + gn->node()->toString().c_str(), + rule->toString().c_str()); + } + } + // In some cases, we can apply optimization rules even if the control flow and data flow are // inconsistent. For now, let the optimization rules themselves guarantee correctness. if (result.eraseAll) { for (auto gnode : groupNodes_) { - gnode->node()->releaseSymbols(); + gnode->release(); } groupNodes_.clear(); for (auto ngn : result.newGroupNodes) { @@ -116,13 +197,16 @@ Status OptGroup::explore(const OptRule *rule) { } if (result.eraseCurr) { - (*iter)->node()->releaseSymbols(); + (*iter)->release(); iter = groupNodes_.erase(iter); } else { ++iter; } } + DCHECK(!groupNodes_.empty()) + << "Should have at least one group node after optimizing the current group"; + return Status::OK(); } @@ -139,6 +223,7 @@ Status OptGroup::exploreUntilMaxRound(const OptRule *rule) { } std::pair OptGroup::findMinCostGroupNode() const { + DCHECK(!groupNodes_.empty()) << "There is no any group nodes in opt group"; double minCost = std::numeric_limits::max(); const OptGroupNode *minGroupNode = nullptr; for (auto &groupNode : groupNodes_) { @@ -157,8 +242,19 @@ double OptGroup::getCost() const { const PlanNode *OptGroup::getPlan() const { const OptGroupNode *minGroupNode = findMinCostGroupNode().second; - DCHECK(minGroupNode != nullptr); - return minGroupNode->getPlan(); + return DCHECK_NOTNULL(minGroupNode)->getPlan(); +} + +void OptGroup::deleteRefGroupNode(const OptGroupNode *node) { + groupNodesReferenced_.erase(node); + if (groupNodesReferenced_.empty()) { + // Cleanup all opt group nodes in current opt group if it's NOT referenced by any other opt + // group nodes + for (auto *n : groupNodes_) { + n->release(); + } + groupNodes_.clear(); + } } OptGroupNode *OptGroupNode::create(OptContext *ctx, PlanNode *node, const OptGroup *group) { @@ -226,5 +322,49 @@ const PlanNode *OptGroupNode::getPlan() const { return node_; } +void OptGroupNode::release() { + node_->releaseSymbols(); + for (auto *dep : dependencies_) { + dep->deleteRefGroupNode(this); + } +} + +Status OptGroupNode::validate(const OptRule *rule) const { + if (!node_) { + return Status::Error("The OptGroupNode does not have plan node when applying the rule: %s", + rule->toString().c_str()); + } + if (!group_) { + return Status::Error( + "The OptGroupNode does not have the right OptGroup when applying the rule: %s", + rule->toString().c_str()); + } + if (!bodies_.empty()) { + if (node_->kind() != PlanNode::Kind::kLoop) { + return Status::Error( + "The plan node is not Loop in OptGroupNode when applying the rule: %s, planNode: %s", + rule->toString().c_str(), + PlanNode::toString(node_->kind())); + } + for (auto *g : bodies_) { + NG_RETURN_IF_ERROR(g->validate(rule)); + } + } + if (dependencies_.empty()) { + if (node_->kind() != PlanNode::Kind::kStart && node_->kind() != PlanNode::Kind::kArgument) { + return Status::Error( + "The leaf plan node is not Start or Argument in OptGroupNode when applying the rule: %s, " + "planNode: %s", + rule->toString().c_str(), + PlanNode::toString(node_->kind())); + } + } else { + for (auto *g : dependencies_) { + NG_RETURN_IF_ERROR(g->validate(rule)); + } + } + return Status::OK(); +} + } // namespace opt } // namespace nebula diff --git a/src/graph/optimizer/OptGroup.h b/src/graph/optimizer/OptGroup.h index 276bcc75bb8..1f264928229 100644 --- a/src/graph/optimizer/OptGroup.h +++ b/src/graph/optimizer/OptGroup.h @@ -48,6 +48,18 @@ class OptGroup final { return outputVar_; } + void addRefGroupNode(const OptGroupNode *node) { + groupNodesReferenced_.insert(node); + } + + void deleteRefGroupNode(const OptGroupNode *node); + + void setRootGroup() { + isRootGroup_ = true; + } + + Status validate(const OptRule *rule) const; + private: friend ObjectPool; explicit OptGroup(OptContext *ctx) noexcept; @@ -55,12 +67,19 @@ class OptGroup final { static constexpr int16_t kMaxExplorationRound = 128; std::pair findMinCostGroupNode() const; + Status validateSubPlan(const OptGroupNode *gn, + const OptRule *rule, + const std::vector &patternLeaves) const; OptContext *ctx_{nullptr}; std::list groupNodes_; std::vector exploredRules_; // The output variable should be same across the whole group. std::string outputVar_; + + bool isRootGroup_{false}; + // Save the OptGroupNode which references this OptGroup + std::unordered_set groupNodesReferenced_; }; class OptGroupNode final { @@ -69,6 +88,7 @@ class OptGroupNode final { void dependsOn(OptGroup *dep) { dependencies_.emplace_back(dep); + dep->addRefGroupNode(this); } const std::vector &dependencies() const { @@ -77,6 +97,9 @@ class OptGroupNode final { void setDeps(std::vector deps) { dependencies_ = deps; + for (auto *dep : deps) { + dep->addRefGroupNode(this); + } } void addBody(OptGroup *body) { @@ -109,6 +132,11 @@ class OptGroupNode final { double getCost() const; const graph::PlanNode *getPlan() const; + // Release the opt group node from its opt group + void release(); + + Status validate(const OptRule *rule) const; + private: friend ObjectPool; OptGroupNode(graph::PlanNode *node, const OptGroup *group) noexcept; diff --git a/src/graph/optimizer/OptRule.cpp b/src/graph/optimizer/OptRule.cpp index 1b7eab7ecc9..23c693c2fd0 100644 --- a/src/graph/optimizer/OptRule.cpp +++ b/src/graph/optimizer/OptRule.cpp @@ -17,8 +17,13 @@ namespace nebula { namespace opt { const PlanNode *MatchedResult::planNode(const std::vector &pos) const { + auto *pnode = DCHECK_NOTNULL(result(pos).node)->node(); + return DCHECK_NOTNULL(pnode); +} + +const MatchedResult &MatchedResult::result(const std::vector &pos) const { if (pos.empty()) { - return DCHECK_NOTNULL(node)->node(); + return *this; } DCHECK_EQ(pos[0], 0); @@ -28,15 +33,20 @@ const PlanNode *MatchedResult::planNode(const std::vector &pos) const { DCHECK_LT(pos[i], result->dependencies.size()); result = &result->dependencies[pos[i]]; } - return DCHECK_NOTNULL(result->node)->node(); + return *DCHECK_NOTNULL(result); } -void MatchedResult::collectBoundary(std::vector &boundary) const { +void MatchedResult::collectPatternLeaves(std::vector &leaves) const { if (dependencies.empty()) { - boundary.insert(boundary.end(), node->dependencies().begin(), node->dependencies().end()); + if (node->dependencies().empty()) { + // nullptr means this node in matched pattern is a leaf node + leaves.push_back(nullptr); + } else { + leaves.insert(leaves.end(), node->dependencies().begin(), node->dependencies().end()); + } } else { for (const auto &dep : dependencies) { - dep.collectBoundary(boundary); + dep.collectPatternLeaves(leaves); } } } @@ -93,8 +103,9 @@ bool OptRule::TransformResult::checkDataFlow(const std::vector &boun }); } -/*static*/ bool OptRule::TransformResult::checkDataFlow(const OptGroupNode *groupNode, - const std::vector &boundary) { +/*static*/ +bool OptRule::TransformResult::checkDataFlow(const OptGroupNode *groupNode, + const std::vector &boundary) { const auto &deps = groupNode->dependencies(); // reach the boundary if (std::all_of(deps.begin(), deps.end(), [&boundary](OptGroup *dep) { @@ -110,13 +121,26 @@ bool OptRule::TransformResult::checkDataFlow(const std::vector &boun const auto *node = groupNode->node(); if (node->inputVars().size() == deps.size()) { // Don't check when count of dependencies is different from count of input variables + auto checkNumReadBy = [](const graph::Variable *v) -> bool { + switch (v->readBy.size()) { + case 1: + return true; + case 2: + // There is at least one Argument plan node if this variable read by 2 other nodes + return std::any_of(v->readBy.begin(), v->readBy.end(), [](auto *n) { + return n->kind() == graph::PlanNode::Kind::kArgument; + }); + default: + return false; + } + }; for (std::size_t i = 0; i < deps.size(); i++) { const OptGroup *dep = deps[i]; if (node->inputVar(i) != dep->outputVar()) { return false; } // Only use by father plan node - if (node->inputVars()[i]->readBy.size() != 1) { + if (!checkNumReadBy(node->inputVars()[i])) { return false; } return std::all_of( @@ -159,7 +183,8 @@ bool OptRule::checkDataflowDeps(OptContext *ctx, if (!isRoot) { for (auto pnode : outVar->readBy) { auto optGNode = ctx->findOptGroupNodeByPlanNodeId(pnode->id()); - if (!optGNode) continue; + // Ignore the data dependency introduced by Argument plan node + if (!optGNode || optGNode->node()->kind() == graph::PlanNode::Kind::kArgument) continue; const auto &deps = optGNode->dependencies(); auto found = std::find(deps.begin(), deps.end(), node->group()); if (found == deps.end()) { diff --git a/src/graph/optimizer/OptRule.h b/src/graph/optimizer/OptRule.h index f19013e7489..1a0e0d65046 100644 --- a/src/graph/optimizer/OptRule.h +++ b/src/graph/optimizer/OptRule.h @@ -40,8 +40,9 @@ struct MatchedResult { // {0, 1, 0} | this->dependencies[1].dependencies[0] // {0, 1, 0, 1} | this->dependencies[1].dependencies[0].dependencies[1] const graph::PlanNode *planNode(const std::vector &pos = {}) const; + const MatchedResult &result(const std::vector &pos = {}) const; - void collectBoundary(std::vector &boundary) const; + void collectPatternLeaves(std::vector &leaves) const; }; // Match plan node by trait or kind of plan node. @@ -52,8 +53,13 @@ class MatchNode { : node_(std::move(kinds)) {} bool match(const graph::PlanNode *node) const { - auto find = node_.find(node->kind()); - return find != node_.end(); + for (auto kind : node_) { + // Any plan node is expected if the kind of node is unknown + if (kind == node->kind() || kind == graph::PlanNode::Kind::kUnknown) { + return true; + } + } + return false; } private: diff --git a/src/graph/optimizer/Optimizer.cpp b/src/graph/optimizer/Optimizer.cpp index 7427ad76d4c..b6909361669 100644 --- a/src/graph/optimizer/Optimizer.cpp +++ b/src/graph/optimizer/Optimizer.cpp @@ -39,6 +39,7 @@ StatusOr Optimizer::findBestPlan(QueryContext *qctx) { auto ret = prepare(optCtx.get(), root); NG_RETURN_IF_ERROR(ret); auto rootGroup = std::move(ret).value(); + rootGroup->setRootGroup(); NG_RETURN_IF_ERROR(doExploration(optCtx.get(), rootGroup)); auto *newRoot = rootGroup->getPlan(); @@ -52,6 +53,7 @@ StatusOr Optimizer::findBestPlan(QueryContext *qctx) { // Just for Properties Pruning Status Optimizer::postprocess(PlanNode *root, graph::QueryContext *qctx, GraphSpaceID spaceID) { + NG_RETURN_IF_ERROR(rewriteArgumentInputVar(root)); if (FLAGS_enable_optimizer_property_pruner_rule) { graph::PropertyTracker propsUsed; graph::PrunePropertiesVisitor visitor(propsUsed, qctx, spaceID); @@ -79,6 +81,7 @@ Status Optimizer::doExploration(OptContext *octx, OptGroup *rootGroup) { for (auto rule : ruleSet->rules()) { // Explore until the maximum number of iterations(Rules) is reached NG_RETURN_IF_ERROR(rootGroup->exploreUntilMaxRound(rule)); + NG_RETURN_IF_ERROR(rootGroup->validate(rule)); rootGroup->setUnexplored(rule); } } @@ -125,5 +128,100 @@ void Optimizer::addBodyToGroupNode(OptContext *ctx, gnode->addBody(body); } +namespace { + +// The plan node referenced by argument always is in the left side of plan tree. So we only need to +// check whether the left root child of binary input plan node contains what the argument needs in +// its output columns +bool findArgumentRefPlanNodeInPath(const std::vector &path, PlanNode *argument) { + DCHECK_EQ(argument->kind(), PlanNode::Kind::kArgument); + for (int i = path.size() - 1; i >= 0; i--) { + const auto *pn = path[i]; + if (pn->isBiInput()) { + DCHECK_LT(i, path.size() - 1); + const auto *bpn = static_cast(pn); + if (bpn->right() == path[i + 1]) { + // Argument is in the right side dependency of binary plan node, check the left child + // output columns + if (argument->isColumnsIncludedIn(bpn->left())) { + argument->setInputVar(bpn->left()->outputVar()); + return true; + } + } else { + // Argument is in the left side dependency of binary plan node, continue to find + // next parent plan node + DCHECK_EQ(bpn->left(), path[i + 1]); + } + } + } + return false; +} + +Status rewriteArgumentInputVarInternal(PlanNode *root, + uint16_t stackDepth, + bool &hasArgument, + std::vector &path) { + const uint16_t kMaxStackDepth = 512u; + if (stackDepth > kMaxStackDepth) { + return Status::Error("The depth of plan tree has exceeded the max %u level", kMaxStackDepth); + } + stackDepth++; + + if (!root) return Status::OK(); + + path.push_back(root); + switch (root->numDeps()) { + case 0: { + if (root->kind() == PlanNode::Kind::kArgument) { + hasArgument = true; + DCHECK(root->inputVar().empty()) + << "Should keep the input empty for argument when plan generation"; + + if (!findArgumentRefPlanNodeInPath(path, root) || root->inputVar().empty()) { + return Status::Error("Could not find the right input variable for argument plan node"); + } + } + break; + } + case 1: { + auto *dep = const_cast(root->dep()); + NG_RETURN_IF_ERROR(rewriteArgumentInputVarInternal(dep, stackDepth, hasArgument, path)); + break; + } + case 2: { + auto *bpn = static_cast(root); + auto *left = const_cast(bpn->left()); + NG_RETURN_IF_ERROR(rewriteArgumentInputVarInternal(left, stackDepth, hasArgument, path)); + auto *right = const_cast(bpn->right()); + NG_RETURN_IF_ERROR(rewriteArgumentInputVarInternal(right, stackDepth, hasArgument, path)); + break; + } + default: { + return Status::Error( + "Invalid dependencies of plan node `%s': %lu", root->toString().c_str(), root->numDeps()); + } + } + path.pop_back(); + + // Ensure that there's no argument plan node if loop/select plan nodes exist in execution plan + if (root->kind() == PlanNode::Kind::kLoop || root->kind() == PlanNode::Kind::kSelect) { + if (hasArgument) { + return Status::Error( + "Loop/Select plan node should not exist with argument in the same execution plan"); + } + } + + return Status::OK(); +} + +} // namespace + +// static +Status Optimizer::rewriteArgumentInputVar(PlanNode *root) { + bool hasArgument = false; + std::vector path; + return rewriteArgumentInputVarInternal(root, 0, hasArgument, path); +} + } // namespace opt } // namespace nebula diff --git a/src/graph/optimizer/Optimizer.h b/src/graph/optimizer/Optimizer.h index b09a48ef4a8..8cf4f842ec1 100644 --- a/src/graph/optimizer/Optimizer.h +++ b/src/graph/optimizer/Optimizer.h @@ -47,6 +47,8 @@ class Optimizer final { OptGroupNode *gnode, std::unordered_map *visited); + static Status rewriteArgumentInputVar(graph::PlanNode *root); + static constexpr int8_t kMaxIterationRound = 5; std::vector ruleSets_; diff --git a/src/graph/optimizer/rule/PushFilterDownAggregateRule.cpp b/src/graph/optimizer/rule/PushFilterDownAggregateRule.cpp index 315b9237d3d..b15ae22f350 100644 --- a/src/graph/optimizer/rule/PushFilterDownAggregateRule.cpp +++ b/src/graph/optimizer/rule/PushFilterDownAggregateRule.cpp @@ -52,20 +52,16 @@ StatusOr PushFilterDownAggregateRule::transform( if (varProps.empty()) { return TransformResult::noTransform(); } - std::vector propNames; - for (auto* expr : varProps) { - DCHECK_EQ(expr->kind(), Expression::Kind::kVarProperty); - propNames.emplace_back(static_cast(expr)->prop()); - } std::unordered_map rewriteMap; auto colNames = newAggNode->colNames(); DCHECK_EQ(newAggNode->colNames().size(), newAggNode->groupItems().size()); for (size_t i = 0; i < colNames.size(); ++i) { auto& colName = colNames[i]; - auto iter = std::find_if(propNames.begin(), propNames.end(), [&colName](const auto& name) { - return !colName.compare(name); + auto iter = std::find_if(varProps.begin(), varProps.end(), [&colName](const auto* expr) { + DCHECK_EQ(expr->kind(), Expression::Kind::kVarProperty); + return !colName.compare(static_cast(expr)->prop()); }); - if (iter == propNames.end()) continue; + if (iter == varProps.end()) continue; if (graph::ExpressionUtils::findAny(groupItems[i], {Expression::Kind::kAggregate})) { return TransformResult::noTransform(); } diff --git a/src/graph/optimizer/rule/PushFilterDownHashInnerJoinRule.cpp b/src/graph/optimizer/rule/PushFilterDownHashInnerJoinRule.cpp new file mode 100644 index 00000000000..92b2513f80f --- /dev/null +++ b/src/graph/optimizer/rule/PushFilterDownHashInnerJoinRule.cpp @@ -0,0 +1,136 @@ +/* Copyright (c) 2022 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +#include "graph/optimizer/rule/PushFilterDownHashInnerJoinRule.h" + +#include "graph/optimizer/OptContext.h" +#include "graph/optimizer/OptGroup.h" +#include "graph/planner/plan/PlanNode.h" +#include "graph/planner/plan/Query.h" +#include "graph/util/ExpressionUtils.h" + +using nebula::graph::PlanNode; +using nebula::graph::QueryContext; + +namespace nebula { +namespace opt { + +std::unique_ptr PushFilterDownHashInnerJoinRule::kInstance = + std::unique_ptr(new PushFilterDownHashInnerJoinRule()); + +PushFilterDownHashInnerJoinRule::PushFilterDownHashInnerJoinRule() { + RuleSet::QueryRules().addRule(this); +} + +const Pattern& PushFilterDownHashInnerJoinRule::pattern() const { + static Pattern pattern = Pattern::create( + PlanNode::Kind::kFilter, + {Pattern::create( + PlanNode::Kind::kHashInnerJoin, + {Pattern::create(PlanNode::Kind::kUnknown), Pattern::create(PlanNode::Kind::kUnknown)})}); + return pattern; +} + +StatusOr PushFilterDownHashInnerJoinRule::transform( + OptContext* octx, const MatchedResult& matched) const { + auto* filterGroupNode = matched.node; + auto* oldFilterNode = filterGroupNode->node(); + DCHECK_EQ(oldFilterNode->kind(), PlanNode::Kind::kFilter); + + auto* innerJoinNode = matched.planNode({0, 0}); + DCHECK_EQ(innerJoinNode->kind(), PlanNode::Kind::kHashInnerJoin); + auto* oldInnerJoinNode = static_cast(innerJoinNode); + + const auto* condition = static_cast(oldFilterNode)->condition(); + DCHECK(condition); + + const auto& leftResult = matched.result({0, 0, 0}); + const auto& rightResult = matched.result({0, 0, 1}); + + Expression *leftFilterUnpicked = nullptr, *rightFilterUnpicked = nullptr; + OptGroup* leftGroup = pushFilterDownChild(octx, leftResult, condition, &leftFilterUnpicked); + OptGroup* rightGroup = + pushFilterDownChild(octx, rightResult, leftFilterUnpicked, &rightFilterUnpicked); + + if (!leftGroup && !rightGroup) { + return TransformResult::noTransform(); + } + + leftGroup = leftGroup ? leftGroup : const_cast(leftResult.node->group()); + rightGroup = rightGroup ? rightGroup : const_cast(rightResult.node->group()); + + // produce new InnerJoin node + auto* newInnerJoinNode = static_cast(oldInnerJoinNode->clone()); + auto newJoinGroup = rightFilterUnpicked ? OptGroup::create(octx) : filterGroupNode->group(); + // TODO(yee): it's too tricky + auto newGroupNode = rightFilterUnpicked + ? const_cast(newJoinGroup)->makeGroupNode(newInnerJoinNode) + : OptGroupNode::create(octx, newInnerJoinNode, newJoinGroup); + newGroupNode->dependsOn(leftGroup); + newGroupNode->dependsOn(rightGroup); + newInnerJoinNode->setLeftVar(leftGroup->outputVar()); + newInnerJoinNode->setRightVar(rightGroup->outputVar()); + + if (rightFilterUnpicked) { + auto newFilterNode = graph::Filter::make(octx->qctx(), nullptr, rightFilterUnpicked); + newFilterNode->setOutputVar(oldFilterNode->outputVar()); + newFilterNode->setColNames(oldFilterNode->colNames()); + newFilterNode->setInputVar(newInnerJoinNode->outputVar()); + newGroupNode = OptGroupNode::create(octx, newFilterNode, filterGroupNode->group()); + newGroupNode->dependsOn(const_cast(newJoinGroup)); + } else { + newInnerJoinNode->setOutputVar(oldFilterNode->outputVar()); + newInnerJoinNode->setColNames(oldFilterNode->colNames()); + } + + TransformResult result; + result.eraseAll = true; + result.newGroupNodes.emplace_back(newGroupNode); + return result; +} + +OptGroup* PushFilterDownHashInnerJoinRule::pushFilterDownChild(OptContext* octx, + const MatchedResult& child, + const Expression* condition, + Expression** unpickedFilter) { + if (!condition) return nullptr; + + const auto* childPlanNode = DCHECK_NOTNULL(child.node->node()); + const auto& colNames = childPlanNode->colNames(); + + // split the `condition` based on whether the varPropExpr comes from the left child + auto picker = [&colNames](const Expression* e) -> bool { + return graph::ExpressionUtils::checkVarPropIfExist(colNames, e); + }; + + Expression* filterPicked = nullptr; + graph::ExpressionUtils::splitFilter(condition, picker, &filterPicked, unpickedFilter); + if (!filterPicked) return nullptr; + + auto* newChildPlanNode = childPlanNode->clone(); + DCHECK_NE(childPlanNode->outputVar(), newChildPlanNode->outputVar()); + newChildPlanNode->setInputVar(childPlanNode->inputVar()); + newChildPlanNode->setColNames(childPlanNode->colNames()); + auto* newChildGroup = OptGroup::create(octx); + auto* newChildGroupNode = newChildGroup->makeGroupNode(newChildPlanNode); + for (auto* g : child.node->dependencies()) { + newChildGroupNode->dependsOn(g); + } + + auto* newFilterNode = graph::Filter::make(octx->qctx(), nullptr, filterPicked); + newFilterNode->setOutputVar(childPlanNode->outputVar()); + newFilterNode->setColNames(colNames); + newFilterNode->setInputVar(newChildPlanNode->outputVar()); + auto* group = OptGroup::create(octx); + group->makeGroupNode(newFilterNode)->dependsOn(newChildGroup); + return group; +} + +std::string PushFilterDownHashInnerJoinRule::toString() const { + return "PushFilterDownHashInnerJoinRule"; +} + +} // namespace opt +} // namespace nebula diff --git a/src/graph/optimizer/rule/PushFilterDownHashInnerJoinRule.h b/src/graph/optimizer/rule/PushFilterDownHashInnerJoinRule.h new file mode 100644 index 00000000000..2f9de94ce8b --- /dev/null +++ b/src/graph/optimizer/rule/PushFilterDownHashInnerJoinRule.h @@ -0,0 +1,37 @@ +/* Copyright (c) 2022 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +#ifndef GRAPH_OPTIMIZER_RULE_PUSHFILTERDOWNHASHINNERJOINRULE_H_ +#define GRAPH_OPTIMIZER_RULE_PUSHFILTERDOWNHASHINNERJOINRULE_H_ + +#include "graph/optimizer/OptRule.h" + +namespace nebula { +namespace opt { + +// Push down the filter items from the left subplan of [[HashInnerJoin]] +class PushFilterDownHashInnerJoinRule final : public OptRule { + public: + const Pattern &pattern() const override; + + StatusOr transform(OptContext *octx, + const MatchedResult &matched) const override; + + std::string toString() const override; + + private: + PushFilterDownHashInnerJoinRule(); + static OptGroup *pushFilterDownChild(OptContext *octx, + const MatchedResult &child, + const Expression *condition, + Expression **unpickedFilter); + + static std::unique_ptr kInstance; +}; + +} // namespace opt +} // namespace nebula + +#endif // GRAPH_OPTIMIZER_RULE_PUSHFILTERDOWNHASHINNERJOINRULE_H_ diff --git a/src/graph/optimizer/rule/PushFilterDownInnerJoinRule.cpp b/src/graph/optimizer/rule/PushFilterDownInnerJoinRule.cpp index 566d5d1be1c..dabf3105a07 100644 --- a/src/graph/optimizer/rule/PushFilterDownInnerJoinRule.cpp +++ b/src/graph/optimizer/rule/PushFilterDownInnerJoinRule.cpp @@ -47,27 +47,9 @@ StatusOr PushFilterDownInnerJoinRule::transform( auto symTable = octx->qctx()->symTable(); std::vector leftVarColNames = symTable->getVar(leftVar.first)->colNames; - // split the `condition` based on whether the varPropExpr comes from the left - // child + // split the `condition` based on whether the varPropExpr comes from the left child auto picker = [&leftVarColNames](const Expression* e) -> bool { - auto varProps = graph::ExpressionUtils::collectAll(e, {Expression::Kind::kVarProperty}); - if (varProps.empty()) { - return false; - } - std::vector propNames; - for (auto* expr : varProps) { - DCHECK(expr->kind() == Expression::Kind::kVarProperty); - propNames.emplace_back(static_cast(expr)->prop()); - } - for (auto prop : propNames) { - auto iter = std::find_if(leftVarColNames.begin(), - leftVarColNames.end(), - [&prop](std::string item) { return !item.compare(prop); }); - if (iter == leftVarColNames.end()) { - return false; - } - } - return true; + return graph::ExpressionUtils::checkVarPropIfExist(leftVarColNames, e); }; Expression* filterPicked = nullptr; Expression* filterUnpicked = nullptr; diff --git a/src/graph/optimizer/rule/PushFilterDownLeftJoinRule.cpp b/src/graph/optimizer/rule/PushFilterDownLeftJoinRule.cpp index d6c830be679..73c85c057f9 100644 --- a/src/graph/optimizer/rule/PushFilterDownLeftJoinRule.cpp +++ b/src/graph/optimizer/rule/PushFilterDownLeftJoinRule.cpp @@ -47,27 +47,9 @@ StatusOr PushFilterDownLeftJoinRule::transform( auto symTable = octx->qctx()->symTable(); std::vector leftVarColNames = symTable->getVar(leftVar.first)->colNames; - // split the `condition` based on whether the varPropExpr comes from the left - // child + // split the `condition` based on whether the varPropExpr comes from the left child auto picker = [&leftVarColNames](const Expression* e) -> bool { - auto varProps = graph::ExpressionUtils::collectAll(e, {Expression::Kind::kVarProperty}); - if (varProps.empty()) { - return false; - } - std::vector propNames; - for (auto* expr : varProps) { - DCHECK(expr->kind() == Expression::Kind::kVarProperty); - propNames.emplace_back(static_cast(expr)->prop()); - } - for (auto prop : propNames) { - auto iter = std::find_if(leftVarColNames.begin(), - leftVarColNames.end(), - [&prop](std::string item) { return !item.compare(prop); }); - if (iter == leftVarColNames.end()) { - return false; - } - } - return true; + return graph::ExpressionUtils::checkVarPropIfExist(leftVarColNames, e); }; Expression* filterPicked = nullptr; Expression* filterUnpicked = nullptr; diff --git a/src/graph/optimizer/rule/RemoveNoopProjectRule.cpp b/src/graph/optimizer/rule/RemoveNoopProjectRule.cpp index ce3b88b2b81..19636b6ad27 100644 --- a/src/graph/optimizer/rule/RemoveNoopProjectRule.cpp +++ b/src/graph/optimizer/rule/RemoveNoopProjectRule.cpp @@ -16,7 +16,8 @@ using nebula::graph::QueryContext; namespace nebula { namespace opt { -/*static*/ const std::initializer_list RemoveNoopProjectRule::kQueries{ +/*static*/ +const std::unordered_set RemoveNoopProjectRule::kQueries{ PlanNode::Kind::kGetNeighbors, PlanNode::Kind::kGetVertices, PlanNode::Kind::kGetEdges, @@ -61,7 +62,8 @@ namespace opt { PlanNode::Kind::kHashInnerJoin, PlanNode::Kind::kCrossJoin, PlanNode::Kind::kRollUpApply, - PlanNode::Kind::kArgument}; + PlanNode::Kind::kArgument, +}; std::unique_ptr RemoveNoopProjectRule::kInstance = std::unique_ptr(new RemoveNoopProjectRule()); @@ -72,21 +74,24 @@ RemoveNoopProjectRule::RemoveNoopProjectRule() { const Pattern& RemoveNoopProjectRule::pattern() const { static Pattern pattern = - Pattern::create(graph::PlanNode::Kind::kProject, {Pattern::create(kQueries)}); + Pattern::create(graph::PlanNode::Kind::kProject, {Pattern::create(PlanNode::Kind::kUnknown)}); return pattern; } StatusOr RemoveNoopProjectRule::transform( OptContext* octx, const MatchedResult& matched) const { - const auto* projGroupNode = matched.node; - const auto* oldProjNode = projGroupNode->node(); - const auto* projGroup = projGroupNode->group(); + const auto* oldProjNode = matched.planNode({0}); DCHECK_EQ(oldProjNode->kind(), PlanNode::Kind::kProject); - const auto* depGroupNode = matched.dependencies.front().node; - auto* newNode = depGroupNode->node()->clone(); + auto* projGroup = const_cast(matched.node->group()); + + const auto* depPlanNode = matched.planNode({0, 0}); + auto* newNode = depPlanNode->clone(); newNode->setOutputVar(oldProjNode->outputVar()); + newNode->setColNames(oldProjNode->colNames()); + newNode->setInputVar(depPlanNode->inputVar()); auto* newGroupNode = OptGroupNode::create(octx, newNode, projGroup); - newGroupNode->setDeps(depGroupNode->dependencies()); + newGroupNode->setDeps(matched.result({0, 0}).node->dependencies()); + TransformResult result; result.eraseAll = true; result.newGroupNodes.emplace_back(newGroupNode); @@ -103,27 +108,29 @@ bool RemoveNoopProjectRule::match(OptContext* octx, const MatchedResult& matched DCHECK_EQ(projGroupNode->node()->kind(), PlanNode::Kind::kProject); auto* projNode = static_cast(projGroupNode->node()); - std::vector cols = projNode->columns()->columns(); - for (auto* col : cols) { - if (col->expr()->kind() != Expression::Kind::kVarProperty && - col->expr()->kind() != Expression::Kind::kInputProperty) { - return false; - } + + const auto* depNode = matched.planNode({0, 0}); + if (kQueries.find(depNode->kind()) == kQueries.end()) { + return false; } - const auto* depGroupNode = matched.dependencies.front().node; - const auto* depNode = depGroupNode->node(); + const auto& depColNames = depNode->colNames(); const auto& projColNames = projNode->colNames(); - auto colsNum = depColNames.size(); - if (colsNum != projColNames.size()) { + auto numCols = depColNames.size(); + if (numCols != projColNames.size()) { return false; } - for (size_t i = 0; i < colsNum; ++i) { - if (depColNames[i].compare(projColNames[i])) { + + std::vector cols = projNode->columns()->columns(); + for (size_t i = 0; i < numCols; ++i) { + const auto* expr = DCHECK_NOTNULL(cols[i]->expr()); + if (expr->kind() != Expression::Kind::kVarProperty && + expr->kind() != Expression::Kind::kInputProperty) { return false; } + const auto* propExpr = static_cast(cols[i]->expr()); - if (propExpr->prop() != projColNames[i]) { + if (propExpr->prop() != projColNames[i] || depColNames[i].compare(projColNames[i])) { return false; } } diff --git a/src/graph/optimizer/rule/RemoveNoopProjectRule.h b/src/graph/optimizer/rule/RemoveNoopProjectRule.h index 6a66a778aa2..0b38d4d8235 100644 --- a/src/graph/optimizer/rule/RemoveNoopProjectRule.h +++ b/src/graph/optimizer/rule/RemoveNoopProjectRule.h @@ -6,7 +6,7 @@ #ifndef GRAPH_OPTIMIZER_RULE_REMOVENOOPPROJECTRULE_H_ #define GRAPH_OPTIMIZER_RULE_REMOVENOOPPROJECTRULE_H_ -#include +#include #include "graph/optimizer/OptRule.h" @@ -43,7 +43,7 @@ class RemoveNoopProjectRule final : public OptRule { private: RemoveNoopProjectRule(); - static const std::initializer_list kQueries; + static const std::unordered_set kQueries; static std::unique_ptr kInstance; }; diff --git a/src/graph/planner/match/ArgumentFinder.cpp b/src/graph/planner/match/ArgumentFinder.cpp index d4d5f2f5720..06ee0c58432 100644 --- a/src/graph/planner/match/ArgumentFinder.cpp +++ b/src/graph/planner/match/ArgumentFinder.cpp @@ -24,9 +24,6 @@ StatusOr ArgumentFinder::transformNode(NodeContext* nodeCtx) { auto alias = nodeCtx->info->alias; auto argNode = Argument::make(nodeCtx->qctx, alias); argNode->setColNames({alias}); - auto aliasGeneratedBy = nodeCtx->qctx->symTable()->getAliasGeneratedBy(alias); - NG_RETURN_IF_ERROR(aliasGeneratedBy); - argNode->setInputVar(aliasGeneratedBy.value()); subplan.root = argNode; subplan.tail = argNode; diff --git a/src/graph/planner/match/MatchPlanner.cpp b/src/graph/planner/match/MatchPlanner.cpp index 1e68fa9beaf..353ea04ab0c 100644 --- a/src/graph/planner/match/MatchPlanner.cpp +++ b/src/graph/planner/match/MatchPlanner.cpp @@ -88,11 +88,6 @@ Status MatchPlanner::connectMatchPlan(SubPlan& queryPlan, MatchClauseContext* ma } } if (!interAliases.empty()) { - if (matchPlan.tail->kind() == PlanNode::Kind::kArgument) { - // The input of the argument operator is always the output of the plan on the other side of - // the join - matchPlan.tail->setInputVar(queryPlan.root->outputVar()); - } if (matchCtx->isOptional) { // connect LeftJoin match filter auto& whereCtx = matchCtx->where; @@ -167,7 +162,9 @@ Status MatchPlanner::genQueryPartPlan(QueryContext* qctx, // TBD: need generate var for all queryPlan.tail? if (queryPlan.tail->isSingleInput()) { - queryPlan.tail->setInputVar(qctx->vctx()->anonVarGen()->getVar()); + if (queryPlan.tail->kind() != PlanNode::Kind::kArgument) { + queryPlan.tail->setInputVar(qctx->vctx()->anonVarGen()->getVar()); + } if (!tailConnected_) { tailConnected_ = true; queryPlan.appendStartNode(qctx); diff --git a/src/graph/planner/plan/PlanNode.cpp b/src/graph/planner/plan/PlanNode.cpp index 4d386865d1d..7221f84eef9 100644 --- a/src/graph/planner/plan/PlanNode.cpp +++ b/src/graph/planner/plan/PlanNode.cpp @@ -11,7 +11,6 @@ #include #include -#include "PlanNode.h" #include "common/graph/Response.h" #include "graph/context/QueryContext.h" #include "graph/planner/plan/PlanNodeVisitor.h" @@ -355,6 +354,17 @@ void PlanNode::setInputVar(const std::string& varname, size_t idx) { } } +bool PlanNode::isColumnsIncludedIn(const PlanNode* other) const { + const auto& otherColNames = other->colNames(); + for (auto& colName : colNames()) { + auto iter = std::find(otherColNames.begin(), otherColNames.end(), colName); + if (iter == otherColNames.end()) { + return false; + } + } + return true; +} + std::unique_ptr PlanNode::explain() const { auto desc = std::make_unique(); desc->id = id_; @@ -464,7 +474,6 @@ std::unique_ptr VariableDependencyNode::explain() const { } void PlanNode::setColNames(std::vector cols) { - qctx_->symTable()->setAliasGeneratedBy(cols, outputVarPtr()->name); outputVarPtr()->colNames = std::move(cols); } } // namespace graph diff --git a/src/graph/planner/plan/PlanNode.h b/src/graph/planner/plan/PlanNode.h index f714038520d..678acc5b271 100644 --- a/src/graph/planner/plan/PlanNode.h +++ b/src/graph/planner/plan/PlanNode.h @@ -304,6 +304,8 @@ class PlanNode { return static_cast(this); } + bool isColumnsIncludedIn(const PlanNode* other) const; + protected: PlanNode(QueryContext* qctx, Kind kind); diff --git a/src/graph/planner/plan/Query.cpp b/src/graph/planner/plan/Query.cpp index 6145559f02a..2383987b78a 100644 --- a/src/graph/planner/plan/Query.cpp +++ b/src/graph/planner/plan/Query.cpp @@ -866,14 +866,15 @@ HashJoin::HashJoin(QueryContext* qctx, : BinaryInputNode(qctx, kind, left, right), hashKeys_(std::move(hashKeys)), probeKeys_(std::move(probeKeys)) { - auto lColNames = left->colNames(); - auto& rColNames = right->colNames(); - for (auto& rColName : rColNames) { - if (std::find(lColNames.begin(), lColNames.end(), rColName) == lColNames.end()) { - lColNames.emplace_back(rColName); + if (left && right) { + auto lColNames = left->colNames(); + for (auto& rColName : right->colNames()) { + if (std::find(lColNames.begin(), lColNames.end(), rColName) == lColNames.end()) { + lColNames.emplace_back(rColName); + } } + setColNames(lColNames); } - setColNames(lColNames); } std::unique_ptr HashLeftJoin::explain() const { @@ -883,9 +884,7 @@ std::unique_ptr HashLeftJoin::explain() const { } PlanNode* HashLeftJoin::clone() const { - auto* lnode = left() ? left()->clone() : nullptr; - auto* rnode = right() ? right()->clone() : nullptr; - auto* newLeftJoin = HashLeftJoin::make(qctx_, lnode, rnode); + auto* newLeftJoin = HashLeftJoin::make(qctx_, nullptr, nullptr); newLeftJoin->cloneMembers(*this); return newLeftJoin; } @@ -901,9 +900,7 @@ std::unique_ptr HashInnerJoin::explain() const { } PlanNode* HashInnerJoin::clone() const { - auto* lnode = left() ? left()->clone() : nullptr; - auto* rnode = right() ? right()->clone() : nullptr; - auto* newInnerJoin = HashInnerJoin::make(qctx_, lnode, rnode); + auto* newInnerJoin = HashInnerJoin::make(qctx_, nullptr, nullptr); newInnerJoin->cloneMembers(*this); return newInnerJoin; } @@ -942,9 +939,7 @@ void RollUpApply::cloneMembers(const RollUpApply& r) { } PlanNode* RollUpApply::clone() const { - auto* lnode = left() ? left()->clone() : nullptr; - auto* rnode = right() ? right()->clone() : nullptr; - auto* newRollUpApply = RollUpApply::make(qctx_, lnode, rnode, {}, nullptr); + auto* newRollUpApply = RollUpApply::make(qctx_, nullptr, nullptr, {}, nullptr); newRollUpApply->cloneMembers(*this); return newRollUpApply; } diff --git a/src/graph/service/QueryInstance.cpp b/src/graph/service/QueryInstance.cpp index 4425b86e3d6..2d5107d9bbb 100644 --- a/src/graph/service/QueryInstance.cpp +++ b/src/graph/service/QueryInstance.cpp @@ -90,8 +90,14 @@ Status QueryInstance::validateAndOptimize() { // Validate the query, if failed, return NG_RETURN_IF_ERROR(Validator::validate(sentence_.get(), qctx())); - // Optimize the query, and get the execution plan - NG_RETURN_IF_ERROR(findBestPlan()); + // Optimize the query, and get the execution plan. We should not pass the optimizer errors to user + // since the message is often not easy to understand. Logging them is enough. + if (auto status = findBestPlan(); !status.ok()) { + LOG(ERROR) << "Error found in optimization stage: " << status.message(); + return Status::Error( + "There are some errors found in optimizer, " + "please contact to the admin to learn more details"); + } stats::StatsManager::addValue(kOptimizerLatencyUs, *(qctx_->plan()->optimizeTimeInUs())); if (FLAGS_enable_space_level_metrics && spaceName != "") { stats::StatsManager::addValue( diff --git a/src/graph/util/ExpressionUtils.cpp b/src/graph/util/ExpressionUtils.cpp index 888321b0e0e..2975681b372 100644 --- a/src/graph/util/ExpressionUtils.cpp +++ b/src/graph/util/ExpressionUtils.cpp @@ -828,6 +828,24 @@ Expression *ExpressionUtils::flattenInnerLogicalExpr(const Expression *expr) { return allFlattenExpr; } +bool ExpressionUtils::checkVarPropIfExist(const std::vector &columns, + const Expression *e) { + auto varProps = graph::ExpressionUtils::collectAll(e, {Expression::Kind::kVarProperty}); + if (varProps.empty()) { + return false; + } + for (const auto *expr : varProps) { + DCHECK_EQ(expr->kind(), Expression::Kind::kVarProperty); + auto iter = std::find_if(columns.begin(), columns.end(), [expr](const std::string &item) { + return !item.compare(static_cast(expr)->prop()); + }); + if (iter == columns.end()) { + return false; + } + } + return true; +} + // pick the subparts of expression that meet picker's criteria void ExpressionUtils::splitFilter(const Expression *expr, std::function picker, diff --git a/src/graph/util/ExpressionUtils.h b/src/graph/util/ExpressionUtils.h index f77f53f9032..5a69cef1998 100644 --- a/src/graph/util/ExpressionUtils.h +++ b/src/graph/util/ExpressionUtils.h @@ -32,7 +32,7 @@ class ExpressionUtils { // Checks if the kind of the given expression is one of the expected kind static inline bool isKindOf(const Expression* expr, const std::unordered_set& expected) { - return expected.find(expr->kind()) != expected.end(); + return expected.find(DCHECK_NOTNULL(expr)->kind()) != expected.end(); } // Checks if the expression is a property expression (TagProperty or LabelTagProperty or @@ -182,9 +182,12 @@ class ExpressionUtils { // calls flattenInnerLogicalAndExpr() first then executes flattenInnerLogicalOrExpr() static Expression* flattenInnerLogicalExpr(const Expression* expr); + // Check whether there exists the property of variable expression in `columns' + static bool checkVarPropIfExist(const std::vector& columns, const Expression* e); + // Uses the picker to split the given experssion expr into two parts: filterPicked and // filterUnpicked If expr is a non-LogicalAnd expression, applies the picker to expr directly If - // expr is a logicalAnd expression, applies the picker to all its operands + // expr is a logicalAnd expression, applies the picker to all its operands static void splitFilter(const Expression* expr, std::function picker, Expression** filterPicked, diff --git a/tests/tck/features/match/AllShortestPaths.feature b/tests/tck/features/match/AllShortestPaths.feature index 71ea1225b07..7a1134f4263 100644 --- a/tests/tck/features/match/AllShortestPaths.feature +++ b/tests/tck/features/match/AllShortestPaths.feature @@ -475,3 +475,65 @@ Feature: allShortestPaths RETURN p """ Then a ExecutionError should be raised at runtime: Scan vertices or edges need to specify a limit number, or limit number can not push down. + + Scenario: allShortestPaths for argument issue + When profiling query: + """ + MATCH (a:player{name:'Tim Duncan'}), (b:player{name:'Tony Parker'}) + WITH a AS b, b AS a + MATCH allShortestPaths((a)-[:like*1..3]-(b)) + RETURN a, b + """ + Then the result should be, in any order, with relax comparison: + | a | b | + | ("Tony Parker" :player{age: 36, name: "Tony Parker"}) | ("Tim Duncan" :player{name: "Tim Duncan"}) | + | ("Tony Parker" :player{age: 36, name: "Tony Parker"}) | ("Tim Duncan" :player{name: "Tim Duncan"}) | + And the execution plan should be: + | id | name | dependencies | operator info | + | 19 | Project | 18 | | + | 18 | HashInnerJoin | 10,17 | | + | 10 | Project | 9 | | + | 9 | CrossJoin | 24,25 | | + | 24 | AppendVertices | 20 | | + | 20 | IndexScan | 2 | | + | 2 | Start | | | + | 25 | AppendVertices | 21 | | + | 21 | IndexScan | 6 | | + | 6 | Start | | | + | 17 | Project | 16 | | + | 16 | ShortestPath | 15 | | + | 15 | CrossJoin | 12,14 | | + | 12 | Project | 11 | | + | 11 | Argument | | | + | 14 | Project | 13 | | + | 13 | Argument | | | + When profiling query: + """ + MATCH (a:player{name:'Tim Duncan'}), (b:player{name:'Tony Parker'}) + WITH a AS b, b AS a + OPTIONAL MATCH allShortestPaths((a)-[:like*1..3]-(b)) + RETURN a, b + """ + Then the result should be, in any order, with relax comparison: + | a | b | + | ("Tony Parker" :player{age: 36, name: "Tony Parker"}) | ("Tim Duncan" :player{name: "Tim Duncan"}) | + | ("Tony Parker" :player{age: 36, name: "Tony Parker"}) | ("Tim Duncan" :player{name: "Tim Duncan"}) | + And the execution plan should be: + | id | name | dependencies | operator info | + | 19 | Project | 18 | | + | 18 | HashLeftJoin | 10,17 | | + | 10 | Project | 9 | | + | 9 | CrossJoin | 24,25 | | + | 24 | AppendVertices | 20 | | + | 20 | IndexScan | 2 | | + | 2 | Start | | | + | 25 | AppendVertices | 21 | | + | 21 | IndexScan | 6 | | + | 6 | Start | | | + | 17 | Project | 16 | | + | 16 | ShortestPath | 15 | | + | 15 | CrossJoin | 12,14 | | + | 12 | Project | 11 | | + | 11 | Argument | | | + | 14 | Project | 13 | | + | 13 | Argument | | | diff --git a/tests/tck/features/match/MultiLineMultiQueryParts.feature b/tests/tck/features/match/MultiLineMultiQueryParts.feature index fafc5ae4d87..0049557407e 100644 --- a/tests/tck/features/match/MultiLineMultiQueryParts.feature +++ b/tests/tck/features/match/MultiLineMultiQueryParts.feature @@ -9,11 +9,16 @@ Feature: Multi Line Multi Query Parts Scenario: Multi Line Multi Path Patterns When executing query: """ - USE nba; - MATCH (m)-[]-(n), (n)-[]-(l) WHERE id(m)=="Tim Duncan" - RETURN m.player.name AS n1, n.player.name AS n2, - CASE WHEN l.team.name is not null THEN l.team.name - WHEN l.player.name is not null THEN l.player.name ELSE "null" END AS n3 ORDER BY n1, n2, n3 LIMIT 10 + MATCH (m)-[]-(n), (n)-[]-(l) WHERE id(m)=='Tim Duncan' + RETURN + m.player.name AS n1, + n.player.name AS n2, + CASE + WHEN l.team.name is not null THEN l.team.name + WHEN l.player.name is not null THEN l.player.name ELSE 'null' + END AS n3 + ORDER BY n1, n2, n3 + LIMIT 10 """ Then the result should be, in order: | n1 | n2 | n3 | @@ -29,8 +34,10 @@ Feature: Multi Line Multi Query Parts | "Tim Duncan" | "Boris Diaw" | "Tim Duncan" | When executing query: """ - MATCH (m)-[]-(n), (n)-[]-(l) WHERE id(n)=="Tim Duncan" - RETURN m.player.name AS n1, n.player.name AS n2, l.player.name AS n3 ORDER BY n1, n2, n3 LIMIT 10 + MATCH (m)-[]-(n), (n)-[]-(l) WHERE id(n)=='Tim Duncan' + RETURN m.player.name AS n1, n.player.name AS n2, l.player.name AS n3 + ORDER BY n1, n2, n3 + LIMIT 10 """ Then the result should be, in order: | n1 | n2 | n3 | diff --git a/tests/tck/features/optimizer/PushFilterDownHashInnerJoinRule.feature b/tests/tck/features/optimizer/PushFilterDownHashInnerJoinRule.feature new file mode 100644 index 00000000000..43c8c956c38 --- /dev/null +++ b/tests/tck/features/optimizer/PushFilterDownHashInnerJoinRule.feature @@ -0,0 +1,205 @@ +# Copyright (c) 2022 vesoft inc. All rights reserved. +# +# This source code is licensed under Apache 2.0 License. +Feature: Push Filter down HashInnerJoin rule + + Background: + Given a graph with space named "nba" + + Scenario: push filter down HashInnerJoin + When profiling query: + """ + MATCH (v:player) + WHERE id(v) == 'Tim Duncan' + WITH v AS v1 + MATCH (v1)-[e:like]-(v2) + WHERE e.likeness > 0 + RETURN e, v2 + ORDER BY e, v2 + """ + Then the result should be, in any order: + | e | v2 | + | [:like "Aron Baynes"->"Tim Duncan" @0 {likeness: 80}] | ("Aron Baynes" :player{age: 32, name: "Aron Baynes"}) | + | [:like "Boris Diaw"->"Tim Duncan" @0 {likeness: 80}] | ("Boris Diaw" :player{age: 36, name: "Boris Diaw"}) | + | [:like "Danny Green"->"Tim Duncan" @0 {likeness: 70}] | ("Danny Green" :player{age: 31, name: "Danny Green"}) | + | [:like "Dejounte Murray"->"Tim Duncan" @0 {likeness: 99}] | ("Dejounte Murray" :player{age: 29, name: "Dejounte Murray"}) | + | [:like "LaMarcus Aldridge"->"Tim Duncan" @0 {likeness: 75}] | ("LaMarcus Aldridge" :player{age: 33, name: "LaMarcus Aldridge"}) | + | [:like "Manu Ginobili"->"Tim Duncan" @0 {likeness: 90}] | ("Manu Ginobili" :player{age: 41, name: "Manu Ginobili"}) | + | [:like "Tim Duncan"->"Manu Ginobili" @0 {likeness: 95}] | ("Manu Ginobili" :player{age: 41, name: "Manu Ginobili"}) | + | [:like "Marco Belinelli"->"Tim Duncan" @0 {likeness: 55}] | ("Marco Belinelli" :player{age: 32, name: "Marco Belinelli"}) | + | [:like "Shaquille O'Neal"->"Tim Duncan" @0 {likeness: 80}] | ("Shaquille O'Neal" :player{age: 47, name: "Shaquille O'Neal"}) | + | [:like "Tiago Splitter"->"Tim Duncan" @0 {likeness: 80}] | ("Tiago Splitter" :player{age: 34, name: "Tiago Splitter"}) | + | [:like "Tony Parker"->"Tim Duncan" @0 {likeness: 95}] | ("Tony Parker" :player{age: 36, name: "Tony Parker"}) | + | [:like "Tim Duncan"->"Tony Parker" @0 {likeness: 95}] | ("Tony Parker" :player{age: 36, name: "Tony Parker"}) | + And the execution plan should be: + | id | name | dependencies | operator info | + | 30 | Sort | 14 | | + | 14 | Project | 19 | | + | 19 | HashInnerJoin | 6,22 | | + | 6 | Project | 20 | | + | 20 | AppendVertices | 2 | | + | 2 | Dedup | 1 | | + | 1 | PassThrough | 3 | | + | 3 | Start | | | + | 22 | Project | 21 | | + | 21 | Filter | 10 | { "condition": "($-.e[0].likeness>0)" } | + | 10 | AppendVertices | 9 | | + | 9 | Traverse | 7 | | + | 7 | Argument | 8 | | + | 8 | Start | | | + When profiling query: + """ + MATCH (v:player) + WHERE id(v) == 'Tim Duncan' + WITH v AS v1 + MATCH (v1)-[e:like]-(v2) + WHERE v1.player.age > 0 + RETURN e, v2 + ORDER BY e, v2 + """ + Then the result should be, in any order: + | e | v2 | + | [:like "Aron Baynes"->"Tim Duncan" @0 {likeness: 80}] | ("Aron Baynes" :player{age: 32, name: "Aron Baynes"}) | + | [:like "Boris Diaw"->"Tim Duncan" @0 {likeness: 80}] | ("Boris Diaw" :player{age: 36, name: "Boris Diaw"}) | + | [:like "Danny Green"->"Tim Duncan" @0 {likeness: 70}] | ("Danny Green" :player{age: 31, name: "Danny Green"}) | + | [:like "Dejounte Murray"->"Tim Duncan" @0 {likeness: 99}] | ("Dejounte Murray" :player{age: 29, name: "Dejounte Murray"}) | + | [:like "LaMarcus Aldridge"->"Tim Duncan" @0 {likeness: 75}] | ("LaMarcus Aldridge" :player{age: 33, name: "LaMarcus Aldridge"}) | + | [:like "Manu Ginobili"->"Tim Duncan" @0 {likeness: 90}] | ("Manu Ginobili" :player{age: 41, name: "Manu Ginobili"}) | + | [:like "Tim Duncan"->"Manu Ginobili" @0 {likeness: 95}] | ("Manu Ginobili" :player{age: 41, name: "Manu Ginobili"}) | + | [:like "Marco Belinelli"->"Tim Duncan" @0 {likeness: 55}] | ("Marco Belinelli" :player{age: 32, name: "Marco Belinelli"}) | + | [:like "Shaquille O'Neal"->"Tim Duncan" @0 {likeness: 80}] | ("Shaquille O'Neal" :player{age: 47, name: "Shaquille O'Neal"}) | + | [:like "Tiago Splitter"->"Tim Duncan" @0 {likeness: 80}] | ("Tiago Splitter" :player{age: 34, name: "Tiago Splitter"}) | + | [:like "Tony Parker"->"Tim Duncan" @0 {likeness: 95}] | ("Tony Parker" :player{age: 36, name: "Tony Parker"}) | + | [:like "Tim Duncan"->"Tony Parker" @0 {likeness: 95}] | ("Tony Parker" :player{age: 36, name: "Tony Parker"}) | + And the execution plan should be: + | id | name | dependencies | operator info | + | 30 | Sort | 14 | | + | 14 | Project | 19 | | + | 19 | HashInnerJoin | 6,11 | | + | 6 | Project | 16 | | + | 16 | Filter | 20 | { "condition": "(v.player.age>0)" } | + | 20 | AppendVertices | 2 | | + | 2 | Dedup | 1 | | + | 1 | PassThrough | 3 | | + | 3 | Start | | | + | 11 | Project | 10 | | + | 10 | AppendVertices | 9 | | + | 9 | Traverse | 7 | | + | 7 | Argument | 8 | | + | 8 | Start | | | + When profiling query: + """ + MATCH (v:player) + WHERE id(v) == 'Tim Duncan' + WITH v AS v1 + MATCH (v1)-[e:like]-(v2) + WHERE e.likeness > 0 AND v1.player.age > 0 + RETURN e, v2 + ORDER BY e, v2 + """ + Then the result should be, in any order: + | e | v2 | + | [:like "Aron Baynes"->"Tim Duncan" @0 {likeness: 80}] | ("Aron Baynes" :player{age: 32, name: "Aron Baynes"}) | + | [:like "Boris Diaw"->"Tim Duncan" @0 {likeness: 80}] | ("Boris Diaw" :player{age: 36, name: "Boris Diaw"}) | + | [:like "Danny Green"->"Tim Duncan" @0 {likeness: 70}] | ("Danny Green" :player{age: 31, name: "Danny Green"}) | + | [:like "Dejounte Murray"->"Tim Duncan" @0 {likeness: 99}] | ("Dejounte Murray" :player{age: 29, name: "Dejounte Murray"}) | + | [:like "LaMarcus Aldridge"->"Tim Duncan" @0 {likeness: 75}] | ("LaMarcus Aldridge" :player{age: 33, name: "LaMarcus Aldridge"}) | + | [:like "Manu Ginobili"->"Tim Duncan" @0 {likeness: 90}] | ("Manu Ginobili" :player{age: 41, name: "Manu Ginobili"}) | + | [:like "Tim Duncan"->"Manu Ginobili" @0 {likeness: 95}] | ("Manu Ginobili" :player{age: 41, name: "Manu Ginobili"}) | + | [:like "Marco Belinelli"->"Tim Duncan" @0 {likeness: 55}] | ("Marco Belinelli" :player{age: 32, name: "Marco Belinelli"}) | + | [:like "Shaquille O'Neal"->"Tim Duncan" @0 {likeness: 80}] | ("Shaquille O'Neal" :player{age: 47, name: "Shaquille O'Neal"}) | + | [:like "Tiago Splitter"->"Tim Duncan" @0 {likeness: 80}] | ("Tiago Splitter" :player{age: 34, name: "Tiago Splitter"}) | + | [:like "Tony Parker"->"Tim Duncan" @0 {likeness: 95}] | ("Tony Parker" :player{age: 36, name: "Tony Parker"}) | + | [:like "Tim Duncan"->"Tony Parker" @0 {likeness: 95}] | ("Tony Parker" :player{age: 36, name: "Tony Parker"}) | + And the execution plan should be: + | id | name | dependencies | operator info | + | 30 | Sort | 14 | | + | 14 | Project | 20 | | + | 20 | HashInnerJoin | 23,25 | | + | 23 | Project | 22 | | + | 22 | Filter | 21 | {"condition": "(v.player.age>0)"} | + | 21 | AppendVertices | 2 | | + | 2 | Dedup | 1 | | + | 1 | PassThrough | 3 | | + | 3 | Start | | | + | 25 | Project | 24 | | + | 24 | Filter | 10 | {"condition": "($-.e[0].likeness>0)"} | + | 10 | AppendVertices | 9 | | + | 9 | Traverse | 7 | | + | 7 | Argument | 8 | | + | 8 | Start | | | + When profiling query: + """ + MATCH (v:player) + WHERE id(v) == 'Tim Duncan' + WITH v AS v1 + MATCH (v1)-[e:like]-(v2) + WHERE e.likeness > 0 OR v1.player.age > 0 + RETURN e, v2 + ORDER BY e, v2 + """ + Then the result should be, in any order: + | e | v2 | + | [:like "Aron Baynes"->"Tim Duncan" @0 {likeness: 80}] | ("Aron Baynes" :player{age: 32, name: "Aron Baynes"}) | + | [:like "Boris Diaw"->"Tim Duncan" @0 {likeness: 80}] | ("Boris Diaw" :player{age: 36, name: "Boris Diaw"}) | + | [:like "Danny Green"->"Tim Duncan" @0 {likeness: 70}] | ("Danny Green" :player{age: 31, name: "Danny Green"}) | + | [:like "Dejounte Murray"->"Tim Duncan" @0 {likeness: 99}] | ("Dejounte Murray" :player{age: 29, name: "Dejounte Murray"}) | + | [:like "LaMarcus Aldridge"->"Tim Duncan" @0 {likeness: 75}] | ("LaMarcus Aldridge" :player{age: 33, name: "LaMarcus Aldridge"}) | + | [:like "Manu Ginobili"->"Tim Duncan" @0 {likeness: 90}] | ("Manu Ginobili" :player{age: 41, name: "Manu Ginobili"}) | + | [:like "Tim Duncan"->"Manu Ginobili" @0 {likeness: 95}] | ("Manu Ginobili" :player{age: 41, name: "Manu Ginobili"}) | + | [:like "Marco Belinelli"->"Tim Duncan" @0 {likeness: 55}] | ("Marco Belinelli" :player{age: 32, name: "Marco Belinelli"}) | + | [:like "Shaquille O'Neal"->"Tim Duncan" @0 {likeness: 80}] | ("Shaquille O'Neal" :player{age: 47, name: "Shaquille O'Neal"}) | + | [:like "Tiago Splitter"->"Tim Duncan" @0 {likeness: 80}] | ("Tiago Splitter" :player{age: 34, name: "Tiago Splitter"}) | + | [:like "Tony Parker"->"Tim Duncan" @0 {likeness: 95}] | ("Tony Parker" :player{age: 36, name: "Tony Parker"}) | + | [:like "Tim Duncan"->"Tony Parker" @0 {likeness: 95}] | ("Tony Parker" :player{age: 36, name: "Tony Parker"}) | + And the execution plan should be: + | id | name | dependencies | operator info | + | 30 | Sort | 14 | | + | 14 | Project | 19 | | + | 19 | HashInnerJoin | 6,22 | | + | 6 | Project | 20 | | + | 20 | AppendVertices | 2 | | + | 2 | Dedup | 1 | | + | 1 | PassThrough | 3 | | + | 3 | Start | | | + | 22 | Project | 21 | | + | 21 | Filter | 10 | { "condition": "(($-.e[0].likeness>0) OR (-.v1.player.age>0))" } | + | 10 | AppendVertices | 9 | | + | 9 | Traverse | 7 | | + | 7 | Argument | 8 | | + | 8 | Start | | | + + Scenario: NOT push filter down HashInnerJoin + When profiling query: + """ + MATCH (v:player)-[]-(vv) + WHERE id(v) == 'Tim Duncan' + WITH v AS v1, vv AS vv + MATCH (v1)-[e:like]-(v2) + WHERE e.likeness > 90 OR vv.team.start_year>2000 + RETURN e, v2 + ORDER BY e, v2 + LIMIT 3 + """ + Then the result should be, in any order: + | e | v2 | + | [:like "Dejounte Murray"->"Tim Duncan" @0 {likeness: 99}] | ("Dejounte Murray" :player{age: 29, name: "Dejounte Murray"}) | + | [:like "Dejounte Murray"->"Tim Duncan" @0 {likeness: 99}] | ("Dejounte Murray" :player{age: 29, name: "Dejounte Murray"}) | + | [:like "Dejounte Murray"->"Tim Duncan" @0 {likeness: 99}] | ("Dejounte Murray" :player{age: 29, name: "Dejounte Murray"}) | + And the execution plan should be: + | id | name | dependencies | operator info | + | 20 | TopN | 15 | | + | 15 | Project | 14 | | + | 14 | Filter | 13 | { "condition": "(($e.likeness>90) OR (vv.team.start_year>2000))" } | + | 13 | HashInnerJoin | 16,12 | | + | 16 | Project | 5 | | + | 5 | AppendVertices | 17 | | + | 17 | Traverse | 2 | | + | 2 | Dedup | 1 | | + | 1 | PassThrough | 3 | | + | 3 | Start | | | + | 12 | Project | 11 | | + | 11 | AppendVertices | 10 | | + | 10 | Traverse | 8 | | + | 8 | Argument | 9 | | + | 9 | Start | | |