From 63c7e26caef5f7aab5756283725141c36f059dc4 Mon Sep 17 00:00:00 2001 From: Sophie <84560950+Sophie-Xie@users.noreply.github.com> Date: Mon, 15 May 2023 18:22:32 +0800 Subject: [PATCH] Cherry pick from nebula: go performance (#2769) * change batch size (#5550) * change batch size * add flag to config * Fix simple case (#5551) * m2n simple case * add test case --------- Co-authored-by: Sophie <84560950+Sophie-Xie@users.noreply.github.com> --------- Co-authored-by: jimingquan --- conf/nebula-graphd.conf.default | 2 + conf/nebula-graphd.conf.production | 2 + src/graph/executor/algo/AllPathsExecutor.cpp | 2 +- src/graph/planner/ngql/GoPlanner.cpp | 19 ++-- src/graph/validator/GoValidator.cpp | 3 +- tests/tck/features/go/SimpleCase.feature | 91 +++++++++++--------- 6 files changed, 69 insertions(+), 50 deletions(-) diff --git a/conf/nebula-graphd.conf.default b/conf/nebula-graphd.conf.default index 974d9b3286a..189408c7030 100644 --- a/conf/nebula-graphd.conf.default +++ b/conf/nebula-graphd.conf.default @@ -180,3 +180,5 @@ --min_batch_size=8192 # if true, return directly without go through RPC --optimize_appendvertices=false +# number of paths constructed by each thread +--path_batch_size=10000 diff --git a/conf/nebula-graphd.conf.production b/conf/nebula-graphd.conf.production index 539c8792c3d..320cc1de1de 100644 --- a/conf/nebula-graphd.conf.production +++ b/conf/nebula-graphd.conf.production @@ -179,3 +179,5 @@ --min_batch_size=8192 # if true, return directly without go through RPC --optimize_appendvertices=false +# number of paths constructed by each thread +--path_batch_size=10000 diff --git a/src/graph/executor/algo/AllPathsExecutor.cpp b/src/graph/executor/algo/AllPathsExecutor.cpp index f70c07ecc93..27d5eef4fc6 100644 --- a/src/graph/executor/algo/AllPathsExecutor.cpp +++ b/src/graph/executor/algo/AllPathsExecutor.cpp @@ -12,7 +12,7 @@ DEFINE_uint32( 100, "the number of vids to expand, when this threshold is exceeded, use heuristic expansion"); DEFINE_uint32(path_threshold_ratio, 2, "threshold for heuristics expansion"); -DEFINE_uint32(path_batch_size, 50000, "number of paths constructed by each thread"); +DEFINE_uint32(path_batch_size, 10000, "number of paths constructed by each thread"); namespace nebula { namespace graph { diff --git a/src/graph/planner/ngql/GoPlanner.cpp b/src/graph/planner/ngql/GoPlanner.cpp index 1fc04cc9973..c07ff06cc3f 100644 --- a/src/graph/planner/ngql/GoPlanner.cpp +++ b/src/graph/planner/ngql/GoPlanner.cpp @@ -311,12 +311,19 @@ StatusOr GoPlanner::transform(AstContext* astCtx) { for (auto node : varPtr->writtenBy) { preRootNode_ = node; } - - auto argNode = Argument::make(qctx, from.runtimeVidName); - argNode->setColNames({from.runtimeVidName}); - argNode->setInputVertexRequired(false); - goCtx_->vidsVar = argNode->outputVar(); - startNode_ = argNode; + if (goCtx_->isSimple) { + // go from 'xxx' over edge yield distinct edge._dst as id | + // go from $-.id over edge yield distinct edge._dst + // above scenario, the second statement does not need the argument operator + startNode_ = preRootNode_; + goCtx_->vidsVar = varName; + } else { + auto argNode = Argument::make(qctx, from.runtimeVidName); + argNode->setColNames({from.runtimeVidName}); + argNode->setInputVertexRequired(false); + goCtx_->vidsVar = argNode->outputVar(); + startNode_ = argNode; + } } auto& steps = goCtx_->steps; if (!steps.isMToN() && steps.steps() == 0) { diff --git a/src/graph/validator/GoValidator.cpp b/src/graph/validator/GoValidator.cpp index cf65da70e15..d589a37596a 100644 --- a/src/graph/validator/GoValidator.cpp +++ b/src/graph/validator/GoValidator.cpp @@ -292,8 +292,7 @@ bool GoValidator::checkDstPropOrVertexExist(const Expression* expr) { } bool GoValidator::isSimpleCase() { - if (!goCtx_->limits.empty() || !goCtx_->distinct || goCtx_->filter || - goCtx_->from.fromType != FromType::kInstantExpr) { + if (!goCtx_->limits.empty() || !goCtx_->distinct || goCtx_->filter != nullptr) { return false; } // Check if the yield cluase uses: diff --git a/tests/tck/features/go/SimpleCase.feature b/tests/tck/features/go/SimpleCase.feature index addcb1107ba..1e4246fb97d 100644 --- a/tests/tck/features/go/SimpleCase.feature +++ b/tests/tck/features/go/SimpleCase.feature @@ -366,23 +366,20 @@ Feature: Simple case | count(*) | | 28 | And the execution plan should be: - | id | name | dependencies | operator info | - | 17 | Aggregate | 15 | | - | 15 | Minus | 13,14 | | - | 13 | Project | 16 | | - | 16 | PassThrough | 12 | | - | 12 | Dedup | 11 | | - | 11 | Project | 10 | | - | 10 | HashInnerJoin | 5,9 | | - | 5 | Project | 4 | | - | 4 | Dedup | 3 | | - | 3 | ExpandAll | 2 | | - | 2 | Expand | 0 | | - | 0 | Start | | | - | 9 | ExpandAll | 8 | | - | 8 | Expand | 7 | | - | 7 | Argument | | | - | 14 | Project | 16 | | + | id | name | dependencies | operator info | + | 14 | Aggregate | 12 | | + | 12 | Minus | 10,11 | | + | 10 | Project | 13 | | + | 13 | PassThrough | 9 | | + | 9 | Project | 8 | | + | 8 | Dedup | 7 | | + | 7 | Expand | 5 | | + | 5 | Project | 4 | | + | 4 | Dedup | 3 | | + | 3 | ExpandAll | 2 | | + | 2 | Expand | 0 | | + | 0 | Start | | | + | 11 | Project | 13 | | Scenario: other simple case When profiling query: @@ -393,18 +390,15 @@ Feature: Simple case | count(*) | | 0 | And the execution plan should be: - | id | name | dependencies | operator info | - | 12 | Aggregate | 11 | | - | 11 | Dedup | 10 | | - | 10 | Project | 9 | | - | 9 | HashInnerJoin | 4,8 | | - | 4 | Project | 3 | | - | 3 | Dedup | 2 | | - | 2 | Expand | 0 | | - | 0 | Start | | | - | 8 | ExpandAll | 7 | | - | 7 | Expand | 6 | | - | 6 | Argument | | | + | id | name | dependencies | operator info | + | 9 | Aggregate | 8 | | + | 8 | Project | 7 | | + | 7 | Dedup | 6 | | + | 6 | Expand | 4 | | + | 4 | Project | 3 | | + | 3 | Dedup | 2 | | + | 2 | Expand | 0 | | + | 0 | Start | | | When profiling query: """ GO 1 STEP FROM "Tony Parker" OVER * YIELD distinct id($$) as id| GO 3 STEP FROM $-.id OVER * YIELD distinct id($$) | YIELD COUNT(*) @@ -413,18 +407,33 @@ Feature: Simple case | COUNT(*) | | 22 | And the execution plan should be: - | id | name | dependencies | operator info | - | 12 | Aggregate | 11 | | - | 11 | Dedup | 10 | | - | 10 | Project | 9 | | - | 9 | HashInnerJoin | 4,8 | | - | 4 | Project | 3 | | - | 3 | Dedup | 2 | | - | 2 | Expand | 0 | | - | 0 | Start | | | - | 8 | ExpandAll | 7 | | - | 7 | Expand | 6 | | - | 6 | Argument | | | + | id | name | dependencies | operator info | + | 9 | Aggregate | 8 | | + | 8 | Project | 7 | | + | 7 | Dedup | 6 | | + | 6 | Expand | 4 | | + | 4 | Project | 3 | | + | 3 | Dedup | 2 | | + | 2 | Expand | 0 | | + | 0 | Start | | | + When profiling query: + """ + GO 1 STEP FROM "Tony Parker" OVER * YIELD distinct id($$) as id| GO 2 to 4 STEP FROM $-.id OVER * YIELD distinct id($$) | YIELD COUNT(*) + """ + Then the result should be, in any order, with relax comparison: + | COUNT(*) | + | 26 | + And the execution plan should be: + | id | name | dependencies | operator info | + | 10 | Aggregate | 9 | | + | 9 | Project | 8 | | + | 8 | Dedup | 7 | | + | 7 | ExpandAll | 6 | | + | 6 | Expand | 4 | | + | 4 | Project | 3 | | + | 3 | Dedup | 2 | | + | 2 | Expand | 0 | | + | 0 | Start | | | Scenario: could not be optimied cases When profiling query: