Skip to content

Commit

Permalink
fix duplicate output column names of join (#4965)
Browse files Browse the repository at this point in the history
  • Loading branch information
jievince authored Dec 2, 2022
1 parent 4f1fe76 commit 4731ed9
Show file tree
Hide file tree
Showing 7 changed files with 277 additions and 26 deletions.
12 changes: 0 additions & 12 deletions src/graph/executor/query/InnerJoinExecutor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -244,18 +244,6 @@ void InnerJoinExecutor::buildNewRow(const std::unordered_map<T, std::vector<cons
}
}

Row InnerJoinExecutor::newRow(Row left, Row right) const {
Row r;
r.reserve(left.size() + right.size());
r.values.insert(r.values.end(),
std::make_move_iterator(left.values.begin()),
std::make_move_iterator(left.values.end()));
r.values.insert(r.values.end(),
std::make_move_iterator(right.values.begin()),
std::make_move_iterator(right.values.end()));
return r;
}

const std::string& InnerJoinExecutor::leftVar() const {
if (node_->kind() == PlanNode::Kind::kBiInnerJoin) {
return node_->asNode<BiJoin>()->leftInputVar();
Expand Down
3 changes: 0 additions & 3 deletions src/graph/executor/query/InnerJoinExecutor.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,6 @@ class InnerJoinExecutor : public JoinExecutor {
Row rRow,
DataSet& ds) const;

// concat rows
Row newRow(Row left, Row right) const;

const std::string& leftVar() const;

const std::string& rightVar() const;
Expand Down
43 changes: 34 additions & 9 deletions src/graph/executor/query/JoinExecutor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,25 @@ Status JoinExecutor::checkBiInputDataSets() {
return Status::Error(ss.str());
}
colSize_ = join->colNames().size();

auto& lhsResult = ectx_->getResult(join->leftInputVar());
auto& lhsColNames = lhsResult.valuePtr()->getDataSet().colNames;
auto lhsColSize = lhsColNames.size();

auto& rhsResult = ectx_->getResult(join->rightInputVar());
auto& rhsColNames = rhsResult.valuePtr()->getDataSet().colNames;
auto rhsColSize = rhsColNames.size();

DCHECK_LE(colSize_, lhsColSize + rhsColSize);
if (colSize_ < lhsColSize + rhsColSize) {
for (size_t i = 0; i < rhsColSize; ++i) {
auto it = std::find(lhsColNames.begin(), lhsColNames.end(), rhsColNames[i]);
if (it == lhsColNames.end()) {
rhsOutputColIdxs_.push_back(i);
}
}
}

return Status::OK();
}

Expand Down Expand Up @@ -83,15 +102,21 @@ void JoinExecutor::buildSingleKeyHashTable(
}

Row JoinExecutor::newRow(Row left, Row right) const {
Row r;
r.reserve(left.size() + right.size());
r.values.insert(r.values.end(),
std::make_move_iterator(left.values.begin()),
std::make_move_iterator(left.values.end()));
r.values.insert(r.values.end(),
std::make_move_iterator(right.values.begin()),
std::make_move_iterator(right.values.end()));
return r;
Row row;
row.reserve(left.size() + right.size());
row.values.insert(row.values.end(),
std::make_move_iterator(left.values.begin()),
std::make_move_iterator(left.values.end()));
if (!rhsOutputColIdxs_.empty()) {
for (auto idx : rhsOutputColIdxs_) {
row.values.emplace_back(std::move(right.values[idx]));
}
} else {
row.values.insert(row.values.end(),
std::make_move_iterator(right.values.begin()),
std::make_move_iterator(right.values.end()));
}
return row;
}

} // namespace graph
Expand Down
6 changes: 6 additions & 0 deletions src/graph/executor/query/JoinExecutor.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,13 @@ class JoinExecutor : public Executor {

std::unique_ptr<Iterator> lhsIter_;
std::unique_ptr<Iterator> rhsIter_;
// colSize_ is the size of the output columns of join executor.
// If the join is natural join, which means the left and right columns have duplicated names,
// the output columns will be a intersection of the left and right columns.
size_t colSize_{0};
// If the join is natural join, rhsOutputColIdxs_ will be used to record the output column index
// of the right. If not, rhsOutputColIdxs_ will be empty.
std::vector<size_t> rhsOutputColIdxs_;
std::unordered_map<Value, std::vector<const Row*>> hashTable_;
std::unordered_map<List, std::vector<const Row*>> listHashTable_;
};
Expand Down
164 changes: 164 additions & 0 deletions src/graph/executor/test/JoinTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "graph/executor/query/InnerJoinExecutor.h"
#include "graph/executor/query/LeftJoinExecutor.h"
#include "graph/executor/test/QueryTestBase.h"
#include "graph/planner/plan/Logic.h"
#include "graph/planner/plan/Query.h"

namespace nebula {
Expand Down Expand Up @@ -68,6 +69,43 @@ class JoinTest : public QueryTestBase {
qctx_->symTable()->newVariable("empty_var2");
qctx_->ectx()->setResult("empty_var2", ResultBuilder().value(Value(std::move(ds))).build());
}
{
DataSet ds;
ds.colNames = {"v1", "e1", "v2", "v3"};
Row row1;
row1.values.emplace_back(1);
row1.values.emplace_back(2);
row1.values.emplace_back(3);
row1.values.emplace_back(4);
ds.rows.emplace_back(std::move(row1));

Row row2;
row2.values.emplace_back(11);
row2.values.emplace_back(12);
row2.values.emplace_back(3);
row2.values.emplace_back(4);
ds.rows.emplace_back(std::move(row2));

qctx_->symTable()->newVariable("var4");
qctx_->ectx()->setResult("var4", ResultBuilder().value(Value(std::move(ds))).build());
}
{
DataSet ds;
ds.colNames = {"v2", "e2", "v3"};
Row row1;
row1.values.emplace_back(3);
row1.values.emplace_back(0);
row1.values.emplace_back(4);
ds.rows.emplace_back(std::move(row1));

Row row2;
row2.values.emplace_back(3);
row2.values.emplace_back(0);
row2.values.emplace_back(5);
ds.rows.emplace_back(std::move(row2));
qctx_->symTable()->newVariable("var5");
qctx_->ectx()->setResult("var5", ResultBuilder().value(Value(std::move(ds))).build());
}
}

void testInnerJoin(std::string left, std::string right, DataSet& expected, int64_t line);
Expand Down Expand Up @@ -171,6 +209,65 @@ TEST_F(JoinTest, InnerJoin) {
testInnerJoin("var2", "var1", expected, __LINE__);
}

TEST_F(JoinTest, BiInnerJoin) {
DataSet expected;
expected.colNames = {"v1", "e1", "v2", "v3", "e2"};
Row row1;
row1.values.emplace_back(1);
row1.values.emplace_back(2);
row1.values.emplace_back(3);
row1.values.emplace_back(4);
row1.values.emplace_back(0);
expected.rows.emplace_back(std::move(row1));

Row row2;
row2.values.emplace_back(11);
row2.values.emplace_back(12);
row2.values.emplace_back(3);
row2.values.emplace_back(4);
row2.values.emplace_back(0);
expected.rows.emplace_back(std::move(row2));

// $var4 inner join $var5 on $var4.v2 = $var5.v2, $var4.v3 = $var4.v3
auto key1 = VariablePropertyExpression::make(pool_, "var4", "v2");
auto key2 = VariablePropertyExpression::make(pool_, "var4", "v3");
std::vector<Expression*> hashKeys = {key1, key2};
auto probe1 = VariablePropertyExpression::make(pool_, "var5", "v2");
auto probe2 = VariablePropertyExpression::make(pool_, "var5", "v3");
std::vector<Expression*> probeKeys = {probe1, probe2};

auto lhs = Project::make(qctx_.get(), nullptr, nullptr);
lhs->setOutputVar("var4");
lhs->setColNames({"v1", "e1", "v2", "v3"});
auto rhs = Project::make(qctx_.get(), nullptr, nullptr);
rhs->setOutputVar("var5");
rhs->setColNames({"v2", "e2", "v3"});

auto* join = BiInnerJoin::make(qctx_.get(), lhs, rhs, std::move(hashKeys), std::move(probeKeys));

auto joinExe = std::make_unique<BiInnerJoinExecutor>(join, qctx_.get());
auto future = joinExe->execute();
auto status = std::move(future).get();
EXPECT_TRUE(status.ok());
auto& result = qctx_->ectx()->getResult(join->outputVar());

DataSet resultDs;
resultDs.colNames = {"v1", "e1", "v2", "v3", "e2"};
auto iter = result.iter();
for (; iter->valid(); iter->next()) {
const auto& cols = *iter->row();
Row row;
for (size_t i = 0; i < cols.size(); ++i) {
Value col = cols[i];
row.values.emplace_back(std::move(col));
}
resultDs.rows.emplace_back(std::move(row));
}

EXPECT_EQ(resultDs, expected);
EXPECT_EQ(result.state(), Result::State::kSuccess);
}

TEST_F(JoinTest, InnerJoinTwice) {
std::string joinOutput;
{
Expand Down Expand Up @@ -283,6 +380,73 @@ TEST_F(JoinTest, LeftJoin) {
testLeftJoin("var1", "var2", expected, __LINE__);
}

TEST_F(JoinTest, BiLeftJoin) {
DataSet expected;
expected.colNames = {"v2", "e2", "v3", "v1", "e1"};
Row row1;
row1.values.emplace_back(3);
row1.values.emplace_back(0);
row1.values.emplace_back(4);
row1.values.emplace_back(1);
row1.values.emplace_back(2);
expected.rows.emplace_back(std::move(row1));

Row row2;
row2.values.emplace_back(3);
row2.values.emplace_back(0);
row2.values.emplace_back(4);
row2.values.emplace_back(11);
row2.values.emplace_back(12);
expected.rows.emplace_back(std::move(row2));

Row row3;
row3.values.emplace_back(3);
row3.values.emplace_back(0);
row3.values.emplace_back(5);
row3.values.emplace_back(Value::kNullValue);
row3.values.emplace_back(Value::kNullValue);
expected.rows.emplace_back(std::move(row3));

// $var5 left join $var4 on $var5.v2 = $var4.v2, $var5.v3 = $var4.v3
auto key1 = VariablePropertyExpression::make(pool_, "var5", "v2");
auto key2 = VariablePropertyExpression::make(pool_, "var5", "v3");
std::vector<Expression*> hashKeys = {key1, key2};
auto probe1 = VariablePropertyExpression::make(pool_, "var4", "v2");
auto probe2 = VariablePropertyExpression::make(pool_, "var4", "v3");
std::vector<Expression*> probeKeys = {probe1, probe2};

auto lhs = Project::make(qctx_.get(), nullptr, nullptr);
lhs->setOutputVar("var5");
lhs->setColNames({"v2", "e2", "v3"});
auto rhs = Project::make(qctx_.get(), nullptr, nullptr);
rhs->setOutputVar("var4");
rhs->setColNames({"v1", "e1", "v2", "v3"});

auto* join = BiLeftJoin::make(qctx_.get(), lhs, rhs, std::move(hashKeys), std::move(probeKeys));

auto joinExe = std::make_unique<BiLeftJoinExecutor>(join, qctx_.get());
auto future = joinExe->execute();
auto status = std::move(future).get();
EXPECT_TRUE(status.ok());
auto& result = qctx_->ectx()->getResult(join->outputVar());

DataSet resultDs;
resultDs.colNames = {"v2", "e2", "v3", "v1", "e1"};
auto iter = result.iter();
for (; iter->valid(); iter->next()) {
const auto& cols = *iter->row();
Row row;
for (size_t i = 0; i < cols.size(); ++i) {
Value col = cols[i];
row.values.emplace_back(std::move(col));
}
resultDs.rows.emplace_back(std::move(row));
}

EXPECT_EQ(resultDs, expected);
EXPECT_EQ(result.state(), Result::State::kSuccess);
}

TEST_F(JoinTest, LeftJoinTwice) {
std::string joinOutput;
{
Expand Down
8 changes: 6 additions & 2 deletions src/graph/planner/plan/Query.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -867,8 +867,12 @@ BiJoin::BiJoin(QueryContext* qctx,
hashKeys_(std::move(hashKeys)),
probeKeys_(std::move(probeKeys)) {
auto lColNames = left->colNames();
auto rColNames = right->colNames();
lColNames.insert(lColNames.end(), rColNames.begin(), rColNames.end());
auto& rColNames = right->colNames();
for (auto& rColName : rColNames) {
if (std::find(lColNames.begin(), lColNames.end(), rColName) == lColNames.end()) {
lColNames.emplace_back(rColName);
}
}
setColNames(lColNames);
}

Expand Down
67 changes: 67 additions & 0 deletions tests/tck/features/match/MultiQueryParts.feature
Original file line number Diff line number Diff line change
Expand Up @@ -140,8 +140,75 @@ Feature: Multi Query Parts
| o.player.name |
| "Tim Duncan" |
| "Tim Duncan" |
# Both the 2 match statement contains variable v1 and v4
When profiling query:
"""
MATCH (v1:player)-[:like*2..2]->(v2)-[e3:like]->(v4) where id(v1) == "Tony Parker"
MATCH (v3:player)-[:like]->(v1)<-[e5]-(v4) where id(v3) == "Tim Duncan" return *
"""
Then the result should be, in any order, with relax comparison:
| v1 | v2 | e3 | v4 | v3 | e5 |
| ("Tony Parker") | ("Tony Parker") | [:like "Tony Parker"->"Tim Duncan" @0 {likeness: 95}] | ("Tim Duncan") | ("Tim Duncan") | [:teammate "Tim Duncan"->"Tony Parker" @0 {}] |
| ("Tony Parker") | ("Manu Ginobili") | [:like "Manu Ginobili"->"Tim Duncan" @0 {likeness: 90}] | ("Tim Duncan") | ("Tim Duncan") | [:teammate "Tim Duncan"->"Tony Parker" @0 {}] |
| ("Tony Parker") | ("Tony Parker") | [:like "Tony Parker"->"Manu Ginobili" @0 {likeness: 95}] | ("Manu Ginobili") | ("Tim Duncan") | [:teammate "Manu Ginobili"->"Tony Parker" @0 {}] |
| ("Tony Parker") | ("Tony Parker") | [:like "Tony Parker"->"Manu Ginobili" @0 {likeness: 95}] | ("Manu Ginobili") | ("Tim Duncan") | [:teammate "Manu Ginobili"->"Tony Parker" @0 {}] |
| ("Tony Parker") | ("Tim Duncan" ) | [:like "Tim Duncan"->"Manu Ginobili" @0 {likeness: 95}] | ("Manu Ginobili") | ("Tim Duncan") | [:teammate "Manu Ginobili"->"Tony Parker" @0 {}] |
| ("Tony Parker") | ("Tim Duncan" ) | [:like "Tim Duncan"->"Manu Ginobili" @0 {likeness: 95}] | ("Manu Ginobili") | ("Tim Duncan") | [:teammate "Manu Ginobili"->"Tony Parker" @0 {}] |
| ("Tony Parker") | ("Tony Parker") | [:like "Tony Parker"->"LaMarcus Aldridge" @0 {likeness: 90}] | ("LaMarcus Aldridge") | ("Tim Duncan") | [:like "LaMarcus Aldridge"->"Tony Parker" @0 {}] |
# The redudant Project after BiInnerJoin is removed now
And the execution plan should be:
| id | name | dependencies | profiling data | operator info |
| 19 | BiInnerJoin | 7,14 | | |
| 7 | Project | 6 | | |
| 6 | AppendVertices | 5 | | |
| 5 | Traverse | 20 | | |
| 20 | Traverse | 2 | | |
| 2 | Dedup | 1 | | |
| 1 | PassThrough | 3 | | |
| 3 | Start | | | |
| 14 | Project | 13 | | |
| 13 | AppendVertices | 12 | | |
| 12 | Traverse | 21 | | |
| 21 | Traverse | 9 | | |
| 9 | Dedup | 8 | | |
| 8 | PassThrough | 10 | | |
| 10 | Start | | | |

Scenario: Optional Match
When profiling query:
"""
MATCH (v1:player)-[:like*2..2]->(v2)-[e3:like]->(v4) where id(v1) == "Tony Parker"
OPTIONAL MATCH (v3:player)-[:like]->(v1)<-[e5]-(v4) where id(v3) == "Tim Duncan" return *
"""
Then the result should be, in any order, with relax comparison:
| v1 | v2 | e3 | v4 | v3 | e5 |
| ("Tony Parker") | ("Tony Parker") | [:like "Tony Parker"->"LaMarcus Aldridge" @0 {likeness: 90}] | ("LaMarcus Aldridge") | ("Tim Duncan") | [:like "LaMarcus Aldridge"->"Tony Parker" @0 {}] |
| ("Tony Parker") | ("Tim Duncan") | [:like "Tim Duncan"->"Tony Parker" @0 {likeness: 95}] | ("Tony Parker") | __NULL__ | __NULL__ |
| ("Tony Parker") | ("Tim Duncan") | [:like "Tim Duncan"->"Tony Parker" @0 {likeness: 95}] | ("Tony Parker") | __NULL__ | __NULL__ |
| ("Tony Parker") | ("Tony Parker") | [:like "Tony Parker"->"Tim Duncan" @0 {likeness: 95}] | ("Tim Duncan") | ("Tim Duncan") | [:teammate "Tim Duncan"->"Tony Parker" @0 {}] |
| ("Tony Parker") | ("Manu Ginobili" :player{age: 41, name: "Manu Ginobili"}) | [:like "Manu Ginobili"->"Tim Duncan" @0 {likeness: 90}] | ("Tim Duncan") | ("Tim Duncan") | [:teammate "Tim Duncan"->"Tony Parker" @0 {}] |
| ("Tony Parker") | ("Tony Parker") | [:like "Tony Parker"->"Manu Ginobili" @0 {likeness: 95}] | ("Manu Ginobili") | ("Tim Duncan") | [:teammate "Manu Ginobili"->"Tony Parker" @0 {}] |
| ("Tony Parker") | ("Tony Parker") | [:like "Tony Parker"->"Manu Ginobili" @0 {likeness: 95}] | ("Manu Ginobili") | ("Tim Duncan") | [:teammate "Manu Ginobili"->"Tony Parker" @0 {}] |
| ("Tony Parker") | ("Tim Duncan") | [:like "Tim Duncan"->"Manu Ginobili" @0 {likeness: 95}] | ("Manu Ginobili") | ("Tim Duncan") | [:teammate "Manu Ginobili"->"Tony Parker" @0 {}] |
| ("Tony Parker") | ("Tim Duncan") | [:like "Tim Duncan"->"Manu Ginobili" @0 {likeness: 95}] | ("Manu Ginobili") | ("Tim Duncan") | [:teammate "Manu Ginobili"->"Tony Parker" @0 {}] |
# The redudant Project after BiLeftJoin is removed now
And the execution plan should be:
| id | name | dependencies | profiling data | operator info |
| 19 | BiLeftJoin | 7,14 | | |
| 7 | Project | 6 | | |
| 6 | AppendVertices | 5 | | |
| 5 | Traverse | 20 | | |
| 20 | Traverse | 2 | | |
| 2 | Dedup | 1 | | |
| 1 | PassThrough | 3 | | |
| 3 | Start | | | |
| 14 | Project | 13 | | |
| 13 | AppendVertices | 12 | | |
| 12 | Traverse | 21 | | |
| 21 | Traverse | 9 | | |
| 9 | Dedup | 8 | | |
| 8 | PassThrough | 10 | | |
| 10 | Start | | | |
When executing query:
"""
MATCH (m)-[]-(n) WHERE id(m)=="Tim Duncan"
Expand Down

0 comments on commit 4731ed9

Please sign in to comment.