From 6eb62fb23fb9485bc5f9dd095fccb71abbacbb20 Mon Sep 17 00:00:00 2001 From: xtcyclist <7731943+xtcyclist@users.noreply.github.com> Date: Wed, 30 Nov 2022 17:53:19 +0800 Subject: [PATCH] Fix order-by's scope of columns. --- src/common/datatypes/DataSet.h | 11 ++ src/common/datatypes/Value.cpp | 5 + src/common/datatypes/Value.h | 1 + src/graph/context/Symbols.h | 1 + src/graph/executor/query/SortExecutor.cpp | 2 + src/graph/executor/query/TopNExecutor.cpp | 8 +- .../planner/match/ReturnClausePlanner.cpp | 7 + src/graph/planner/plan/PlanNode.h | 4 + src/graph/validator/MatchValidator.cpp | 90 +++++++++++- src/graph/validator/MatchValidator.h | 6 + src/parser/Clauses.h | 17 ++- tests/tck/features/match/OrderBy.feature | 139 ++++++++++++++++++ 12 files changed, 278 insertions(+), 13 deletions(-) create mode 100644 tests/tck/features/match/OrderBy.feature diff --git a/src/common/datatypes/DataSet.h b/src/common/datatypes/DataSet.h index b1230001ce8..1f81a3a7af7 100644 --- a/src/common/datatypes/DataSet.h +++ b/src/common/datatypes/DataSet.h @@ -216,6 +216,17 @@ struct DataSet { bool operator==(const DataSet& rhs) const { return colNames == rhs.colNames && rows == rhs.rows; } + + void removeColumn(std::vector& indices) { + for (auto& row : rows) { + for (auto& idx : indices) { + row.values.erase(row.values.begin() + idx); + } + } + for (auto& idx : indices) { + colNames.erase(colNames.begin() + idx); + } + } }; inline std::ostream& operator<<(std::ostream& os, const DataSet& d) { diff --git a/src/common/datatypes/Value.cpp b/src/common/datatypes/Value.cpp index c49adf90290..6632feb64ac 100644 --- a/src/common/datatypes/Value.cpp +++ b/src/common/datatypes/Value.cpp @@ -678,6 +678,11 @@ const DataSet& Value::getDataSet() const { return *(value_.gVal); } +DataSet& Value::getMutableDataSet() { + CHECK_EQ(type_, Type::DATASET); + return *(value_.gVal); +} + const DataSet* Value::getDataSetPtr() const { CHECK_EQ(type_, Type::DATASET); return value_.gVal.get(); diff --git a/src/common/datatypes/Value.h b/src/common/datatypes/Value.h index b55397a1566..329536d4127 100644 --- a/src/common/datatypes/Value.h +++ b/src/common/datatypes/Value.h @@ -309,6 +309,7 @@ struct Value { const Set& getSet() const; const Set* getSetPtr() const; const DataSet& getDataSet() const; + DataSet& getMutableDataSet(); const DataSet* getDataSetPtr() const; const Geography& getGeography() const; const Geography* getGeographyPtr() const; diff --git a/src/graph/context/Symbols.h b/src/graph/context/Symbols.h index fb30d957770..61c59c20c7d 100644 --- a/src/graph/context/Symbols.h +++ b/src/graph/context/Symbols.h @@ -49,6 +49,7 @@ struct Variable { // the count of use the variable std::atomic userCount{0}; + std::vector invisibleColIndicies; }; class SymbolTable final { diff --git a/src/graph/executor/query/SortExecutor.cpp b/src/graph/executor/query/SortExecutor.cpp index ebeba5cd854..9f09dd87672 100644 --- a/src/graph/executor/query/SortExecutor.cpp +++ b/src/graph/executor/query/SortExecutor.cpp @@ -44,6 +44,8 @@ folly::Future SortExecutor::execute() { auto seqIter = static_cast(iter); std::sort(seqIter->begin(), seqIter->end(), comparator); + result.valuePtr()->getMutableDataSet().removeColumn(sort->inputVars()[0]->invisibleColIndicies); + return finish(ResultBuilder().value(result.valuePtr()).iter(std::move(result).iter()).build()); } diff --git a/src/graph/executor/query/TopNExecutor.cpp b/src/graph/executor/query/TopNExecutor.cpp index e685a8ff31b..0f68959dc98 100644 --- a/src/graph/executor/query/TopNExecutor.cpp +++ b/src/graph/executor/query/TopNExecutor.cpp @@ -54,13 +54,15 @@ folly::Future TopNExecutor::execute() { maxCount_ = size - offset_; heapSize_ = size; } + if (heapSize_ == 0) { iter->clear(); - return finish(ResultBuilder().value(result.valuePtr()).iter(std::move(result).iter()).build()); + } else { + executeTopN(iter); + iter->eraseRange(maxCount_, size); } + result.valuePtr()->getMutableDataSet().removeColumn(topn->inputVars()[0]->invisibleColIndicies); - executeTopN(iter); - iter->eraseRange(maxCount_, size); return finish(ResultBuilder().value(result.valuePtr()).iter(std::move(result).iter()).build()); } diff --git a/src/graph/planner/match/ReturnClausePlanner.cpp b/src/graph/planner/match/ReturnClausePlanner.cpp index 87fa55714de..bf8d278dd5c 100644 --- a/src/graph/planner/match/ReturnClausePlanner.cpp +++ b/src/graph/planner/match/ReturnClausePlanner.cpp @@ -38,6 +38,13 @@ Status ReturnClausePlanner::buildReturn(ReturnClauseContext* rctx, SubPlan& subP NG_RETURN_IF_ERROR(orderPlan); auto plan = std::move(orderPlan).value(); subPlan = SegmentsConnector::addInput(plan, subPlan, true); + size_t idx = 0; + for (auto& yiledCol : rctx->yield->yieldColumns->columns()) { + if (!yiledCol->isVisible()) { + subPlan.root->mutableInputVars()[0]->invisibleColIndicies.push_back(idx); + } + idx++; + } } if (rctx->pagination != nullptr && diff --git a/src/graph/planner/plan/PlanNode.h b/src/graph/planner/plan/PlanNode.h index f714038520d..6d79fcd793e 100644 --- a/src/graph/planner/plan/PlanNode.h +++ b/src/graph/planner/plan/PlanNode.h @@ -278,6 +278,10 @@ class PlanNode { return inputVars_; } + std::vector& mutableInputVars() { + return inputVars_; + } + void releaseSymbols(); void updateSymbols(); diff --git a/src/graph/validator/MatchValidator.cpp b/src/graph/validator/MatchValidator.cpp index aca9729dd8b..7b67028f733 100644 --- a/src/graph/validator/MatchValidator.cpp +++ b/src/graph/validator/MatchValidator.cpp @@ -437,7 +437,8 @@ Status MatchValidator::validateReturn(MatchReturn *ret, if (!status.ok()) { return status; } - if (columns->empty() && !ret->returnItems()->columns()) { + if (ret->returnItems()->allNamedAliases() && columns->empty() && + !ret->returnItems()->columns()) { return Status::SemanticError("RETURN * is not allowed when there are no variables in scope"); } } @@ -462,6 +463,13 @@ Status MatchValidator::validateReturn(MatchReturn *ret, } } DCHECK(!columns->empty()); + if (ret->orderFactors() != nullptr) { + auto orderByCtx = getContext(); + NG_RETURN_IF_ERROR(validateOrderBy(ret, retClauseCtx, columns, *orderByCtx, queryParts)); + retClauseCtx.order = std::move(orderByCtx); + } + + // finished producing columns for further validations retClauseCtx.yield->yieldColumns = columns; NG_RETURN_IF_ERROR(validateAliases(exprs, retClauseCtx.yield->aliasesAvailable)); @@ -473,13 +481,6 @@ Status MatchValidator::validateReturn(MatchReturn *ret, NG_RETURN_IF_ERROR(validatePagination(ret->skip(), ret->limit(), *paginationCtx)); retClauseCtx.pagination = std::move(paginationCtx); - if (ret->orderFactors() != nullptr) { - auto orderByCtx = getContext(); - NG_RETURN_IF_ERROR( - validateOrderBy(ret->orderFactors(), retClauseCtx.yield->yieldColumns, *orderByCtx)); - retClauseCtx.order = std::move(orderByCtx); - } - return Status::OK(); } @@ -830,6 +831,79 @@ Status MatchValidator::validateOrderBy(const OrderFactors *factors, return Status::OK(); } +Status MatchValidator::validateOrderBy(MatchReturn *ret, + ReturnClauseContext &retClauseCtx, + YieldColumns *yieldColumns, + OrderByClauseContext &orderByCtx, + const std::vector &queryParts) const { + const OrderFactors *factors = ret->orderFactors(); + bool needsAndOkToExpandScope = !retClauseCtx.yield->hasAgg_ && !ret->isDistinct() && + !ret->returnItems()->allNamedAliases() && !queryParts.empty(); + + // if the columns required are not already in the projection columns, they may be found + // in in the columns before the projection + std::vector inputColList; + inputColList.reserve(yieldColumns->columns().size()); + for (auto *col : yieldColumns->columns()) { + inputColList.emplace_back(col->name()); + } + std::unordered_map inputColIndices; + for (auto i = 0u; i < inputColList.size(); i++) { + if (!inputColIndices.emplace(inputColList[i], i).second) { + return Status::SemanticError("Duplicated columns not allowed: %s", inputColList[i].c_str()); + } + } + YieldColumns *allQueryPartsColumns = nullptr; + for (auto &factor : factors->factors()) { + auto factorExpr = factor->expr(); + if (ExpressionUtils::isEvaluableExpr(factorExpr, qctx_)) continue; + if (factorExpr->kind() != Expression::Kind::kLabel) { + return Status::SemanticError("Only column name can be used as sort item"); + } + auto &name = static_cast(factor->expr())->name(); + auto iter = inputColIndices.find(name); + if (iter == inputColIndices.end()) { + // some column is not found + if (!needsAndOkToExpandScope) { + return Status::SemanticError("Column `%s' not found", name.c_str()); + } else { + // expand the scope to columns before the projection + // and see whether the required column could be found + bool found = false; + if (allQueryPartsColumns == nullptr) { + // expand + allQueryPartsColumns = retClauseCtx.qctx->objPool()->makeAndAdd(); + auto status = buildColumnsForAllNamedAliases(queryParts, allQueryPartsColumns); + if (!status.ok()) { + return status; + } + } + for (auto &col : allQueryPartsColumns->columns()) { + // check column + if (col->name().compare(name) != 0) { + continue; + } else { + col->setVisible(false); + yieldColumns->addColumn(col->clone().release()); + found = true; + orderByCtx.indexedOrderFactors.emplace_back(yieldColumns->size() - 1, + factor->orderType()); + break; + } + } + if (!found) { + return Status::SemanticError("Column `%s' not found", name.c_str()); + } + } + } else { + // iter found + orderByCtx.indexedOrderFactors.emplace_back(iter->second, factor->orderType()); + } + } + + return Status::OK(); +} + // Validate group by and fill group by context. Status MatchValidator::validateGroup(YieldClauseContext &yieldCtx) { auto cols = yieldCtx.yieldColumns->columns(); diff --git a/src/graph/validator/MatchValidator.h b/src/graph/validator/MatchValidator.h index b33feede7dc..ffb537f4a22 100644 --- a/src/graph/validator/MatchValidator.h +++ b/src/graph/validator/MatchValidator.h @@ -54,6 +54,12 @@ class MatchValidator final : public Validator { const YieldColumns *yieldColumns, OrderByClauseContext &orderByCtx) const; + Status validateOrderBy(MatchReturn *ret, + ReturnClauseContext &retClauseCtx, + YieldColumns *yieldColumns, + OrderByClauseContext &orderByCtx, + const std::vector &queryParts) const; + Status validateGroup(YieldClauseContext &yieldCtx); Status validateYield(YieldClauseContext &yieldCtx); diff --git a/src/parser/Clauses.h b/src/parser/Clauses.h index fd62eb20418..d4fd8350f9b 100644 --- a/src/parser/Clauses.h +++ b/src/parser/Clauses.h @@ -254,13 +254,14 @@ class WhenClause : public WhereClause { class YieldColumn final { public: - explicit YieldColumn(Expression *expr, const std::string &alias = "") { + explicit YieldColumn(Expression *expr, const std::string &alias = "", bool visible = true) { expr_ = expr; alias_ = alias; + visible_ = visible; } std::unique_ptr clone() const { - return std::make_unique(expr_->clone(), alias_); + return std::make_unique(expr_->clone(), alias_, visible_); } void setExpr(Expression *expr) { @@ -285,9 +286,21 @@ class YieldColumn final { std::string toString() const; + bool isVisible() const { + return visible_; + } + + void setVisible(bool visible) { + visible_ = visible; + } + private: Expression *expr_{nullptr}; std::string alias_; + // If some columns are required by order-by and are not explicitly + // declared in the return caluse, it is an invisible column and not to be + // produced as results. + bool visible_{true}; }; bool operator==(const YieldColumn &l, const YieldColumn &r); diff --git a/tests/tck/features/match/OrderBy.feature b/tests/tck/features/match/OrderBy.feature new file mode 100644 index 00000000000..52be1ef2f54 --- /dev/null +++ b/tests/tck/features/match/OrderBy.feature @@ -0,0 +1,139 @@ +# Copyright (c) 2020 vesoft inc. All rights reserved. +# +# This source code is licensed under Apache 2.0 License. +Feature: OrderBy in Match + + Background: + Given a graph with space named "nba" + + Scenario: orderby hidden column + When executing query: + """ + match (v:player)-->(v1) + with v, v1, count(v1) as a0 + where a0 > 1 + return sum(v.player.age) + order by a0 + limit 5 + """ + Then a SemanticError should be raised at runtime: Column `a0' not found + When executing query: + """ + match (v:player)-->(v1) + with v, v1, count(v1) as a0 + where a0 > 1 + return distinct v + order by a0 + limit 5 + """ + Then a SemanticError should be raised at runtime: Column `a0' not found + When executing query: + """ + match (v:player)-->(v1:player) + with v, v1, v.player.name as name, v1.player.name as name2, count(v) as a0 + where a0 > 1 + return v, v1 + order by a0, name, name2 + """ + Then the result should be, in order: + | v | v1 | + | ("Manu Ginobili" :player{age: 41, name: "Manu Ginobili"}) | ("Tim Duncan" :player{age: 42, name: "Tim Duncan"} :bachelor{name: "Tim Duncan", speciality: "psychology"}) | + | ("Tim Duncan" :player{age: 42, name: "Tim Duncan"} :bachelor{name: "Tim Duncan", speciality: "psychology"}) | ("Manu Ginobili" :player{age: 41, name: "Manu Ginobili"}) | + | ("Tim Duncan" :player{age: 42, name: "Tim Duncan"} :bachelor{name: "Tim Duncan", speciality: "psychology"}) | ("Tony Parker" :player{age: 36, name: "Tony Parker"}) | + | ("Tony Parker" :player{age: 36, name: "Tony Parker"}) | ("LaMarcus Aldridge" :player{age: 33, name: "LaMarcus Aldridge"}) | + | ("Tony Parker" :player{age: 36, name: "Tony Parker"}) | ("Manu Ginobili" :player{age: 41, name: "Manu Ginobili"}) | + | ("Tony Parker" :player{age: 36, name: "Tony Parker"}) | ("Tim Duncan" :player{age: 42, name: "Tim Duncan"} :bachelor{name: "Tim Duncan", speciality: "psychology"}) | + + Scenario: topn hidden column + When executing query: + """ + match (v:player)-->(v1:player) + with v, v1, v.player.name as name, v1.player.name as name2, count(v) as a0 + where a0 > 0 + return name + order by a0 desc, name desc, name2 desc + limit 10 + """ + Then the result should be, in order: + | name | + | "Tony Parker" | + | "Tony Parker" | + | "Tony Parker" | + | "Tim Duncan" | + | "Tim Duncan" | + | "Manu Ginobili" | + | "Yao Ming" | + | "Yao Ming" | + | "Vince Carter" | + | "Vince Carter" | + When executing query: + """ + match (v:player)-->(v1:player) + with v, v1, v.player.name as name, v1.player.name as name2, count(v) as a0 + where a0 > 0 + return a0, name + order by a0 desc, name desc, name2 desc + limit 10 + """ + Then the result should be, in order: + | a0 | name | + | 2 | "Tony Parker" | + | 2 | "Tony Parker" | + | 2 | "Tony Parker" | + | 2 | "Tim Duncan" | + | 2 | "Tim Duncan" | + | 2 | "Manu Ginobili" | + | 1 | "Yao Ming" | + | 1 | "Yao Ming" | + | 1 | "Vince Carter" | + | 1 | "Vince Carter" | + + Scenario: orderby return column + When executing query: + """ + match (v:player)-->(v1:player) + with v, v1, v.player.age as age0, v1.player.age as age1, count(v) as a0 + where a0 > 1 + return age0, age1 + order by age0, age1 + """ + Then the result should be, in order: + | age0 | age1 | + | 36 | 33 | + | 36 | 41 | + | 36 | 42 | + | 41 | 42 | + | 42 | 36 | + | 42 | 41 | + When executing query: + """ + match (v:player)-->(v1:player) + with v, v1, v.player.name as name, v1.player.name as name2, count(v) as a0 + where a0 > 1 + return name, a0 + order by a0, name desc, name2 desc + """ + Then the result should be, in order: + | name | a0 | + | "Tony Parker" | 2 | + | "Tony Parker" | 2 | + | "Tony Parker" | 2 | + | "Tim Duncan" | 2 | + | "Tim Duncan" | 2 | + | "Manu Ginobili" | 2 | + When executing query: + """ + match (v:player)-->(v1:player) + with v.player.name as name, v1, v1.player.name as name2, count(v) as a0 + where a0 > 1 + return * + order by name, a0 desc, name2 desc + """ + Then the result should be, in order: + | name | v1 | name2 | a0 | + | "Manu Ginobili" | ("Tim Duncan" :player{age: 42, name: "Tim Duncan"} :bachelor{name: "Tim Duncan", speciality: "psychology"}) | "Tim Duncan" | 2 | + | "Tim Duncan" | ("Tony Parker" :player{age: 36, name: "Tony Parker"}) | "Tony Parker" | 2 | + | "Tim Duncan" | ("Manu Ginobili" :player{age: 41, name: "Manu Ginobili"}) | "Manu Ginobili" | 2 | + | "Tony Parker" | ("Tim Duncan" :player{age: 42, name: "Tim Duncan"} :bachelor{name: "Tim Duncan", speciality: "psychology"}) | "Tim Duncan" | 2 | + | "Tony Parker" | ("Manu Ginobili" :player{age: 41, name: "Manu Ginobili"}) | "Manu Ginobili" | 2 | + | "Tony Parker" | ("LaMarcus Aldridge" :player{age: 33, name: "LaMarcus Aldridge"}) | "LaMarcus Aldridge" | 2 |