Skip to content

Commit

Permalink
Support lookup indexScan using IN expression as filter (#2906)
Browse files Browse the repository at this point in the history
* Move PR from nebula-graph

* Add method to convert AND expr to OR expr

* Fix rewriteLogicalAndToLogicalOr() and clear tests

* Fix tests

Fix compilation

* Forbid using string-related relational expressions as the filter of LOOKUP except STARTS/NOT STARTS WITH

* Address comments

* Add uts for rewriteInExpr() and rewriteLogicalAndToLogicalOr()

* Check foldability of container expr in VidExtractVisitor
  • Loading branch information
Aiee authored Sep 30, 2021
1 parent 2d02462 commit dd2a7d2
Show file tree
Hide file tree
Showing 20 changed files with 891 additions and 411 deletions.
33 changes: 32 additions & 1 deletion src/graph/optimizer/OptimizerUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -647,7 +647,12 @@ StatusOr<ScoredColumnHint> selectRelExprIndex(const ColumnDef& field,
}

auto right = expr->right();
DCHECK(right->kind() == Expression::Kind::kConstant);
if (expr->kind() == Expression::Kind::kRelIn) { // container expressions
DCHECK(right->isContainerExpr());
} else { // other expressions
DCHECK(right->kind() == Expression::Kind::kConstant);
}

const auto& value = static_cast<const ConstantExpression*>(right)->value();

ScoredColumnHint hint;
Expand Down Expand Up @@ -917,6 +922,32 @@ bool OptimizerUtils::findOptimalIndex(const Expression* condition,
return true;
}

// Check if the relational expression has a valid index
// The left operand should either be a kEdgeProperty or kTagProperty expr
bool OptimizerUtils::relExprHasIndex(
const Expression* expr,
const std::vector<std::shared_ptr<nebula::meta::cpp2::IndexItem>>& indexItems) {
DCHECK(expr->isRelExpr());

for (auto& index : indexItems) {
const auto& fields = index->get_fields();
if (fields.empty()) {
return false;
}

auto left = static_cast<const RelationalExpression*>(expr)->left();
DCHECK(left->kind() == Expression::Kind::kEdgeProperty ||
left->kind() == Expression::Kind::kTagProperty);

auto propExpr = static_cast<const PropertyExpression*>(left);
if (propExpr->prop() == fields[0].get_name()) {
return true;
}
}

return false;
}

void OptimizerUtils::copyIndexScanData(const nebula::graph::IndexScan* from,
nebula::graph::IndexScan* to) {
to->setEmptyResultSet(from->isEmptyResultSet());
Expand Down
4 changes: 4 additions & 0 deletions src/graph/optimizer/OptimizerUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,10 @@ class OptimizerUtils {
bool* isPrefixScan,
nebula::storage::cpp2::IndexQueryContext* ictx);

static bool relExprHasIndex(
const Expression* expr,
const std::vector<std::shared_ptr<nebula::meta::cpp2::IndexItem>>& indexItems);

static void copyIndexScanData(const nebula::graph::IndexScan* from, nebula::graph::IndexScan* to);
};

Expand Down
55 changes: 50 additions & 5 deletions src/graph/optimizer/rule/OptimizeTagIndexScanByFilterRule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,15 @@ const Pattern& OptimizeTagIndexScanByFilterRule::pattern() const {
return pattern;
}

// Match 2 kinds of expressions:
//
// 1. Relational expr. If it is an IN expr, its list MUST have only 1 element, so it could always be
// transformed to an relEQ expr. i.g. A in [B] => A == B
// It the list has more than 1 element, the expr will be matched with UnionAllIndexScanBaseRule.
//
// 2. Logical AND expr. If the AND expr contains an operand that is an IN expr, the label attribute
// in the IN expr SHOULD NOT have a valid index, otherwise the expression should be matched with
// UnionAllIndexScanBaseRule.
bool OptimizeTagIndexScanByFilterRule::match(OptContext* ctx, const MatchedResult& matched) const {
if (!OptRule::match(ctx, matched)) {
return false;
Expand All @@ -58,16 +67,23 @@ bool OptimizeTagIndexScanByFilterRule::match(OptContext* ctx, const MatchedResul
}
}
auto condition = filter->condition();

// Case1: relational expr
if (condition->isRelExpr()) {
auto relExpr = static_cast<const RelationalExpression*>(condition);
// If the container in the IN expr has only 1 element, it will be converted to an relEQ
// expr. If more than 1 element found in the container, UnionAllIndexScanBaseRule will be
// applied.
if (relExpr->kind() == ExprKind::kRelIn && relExpr->right()->isContainerExpr()) {
auto ContainerOperands = graph::ExpressionUtils::getContainerExprOperands(relExpr->right());
return ContainerOperands.size() == 1;
}
return relExpr->left()->kind() == ExprKind::kTagProperty &&
relExpr->right()->kind() == ExprKind::kConstant;
}
if (condition->isLogicalExpr()) {
return condition->kind() == Expression::Kind::kLogicalAnd;
}

return false;
// Case2: logical AND expr
return condition->kind() == ExprKind::kLogicalAnd;
}

TagIndexScan* makeTagIndexScan(QueryContext* qctx, const TagIndexScan* scan, bool isPrefixScan) {
Expand All @@ -94,9 +110,38 @@ StatusOr<TransformResult> OptimizeTagIndexScanByFilterRule::transform(

OptimizerUtils::eraseInvalidIndexItems(scan->schemaId(), &indexItems);

auto condition = filter->condition();
auto conditionType = condition->kind();
Expression* transformedExpr = condition->clone();

// Stand alone IN expr with only 1 element in the list, no need to check index
if (conditionType == ExprKind::kRelIn) {
transformedExpr = graph::ExpressionUtils::rewriteInExpr(condition);
DCHECK(transformedExpr->kind() == ExprKind::kRelEQ);
}

// case2: logical AND expr
if (condition->kind() == ExprKind::kLogicalAnd) {
for (auto& operand : static_cast<const LogicalExpression*>(condition)->operands()) {
if (operand->kind() == ExprKind::kRelIn) {
auto inExpr = static_cast<RelationalExpression*>(operand);
// Do not apply this rule if the IN expr has a valid index or it has only 1 element in the
// list
if (static_cast<ListExpression*>(inExpr->right())->size() > 1) {
return TransformResult::noTransform();
} else {
transformedExpr = graph::ExpressionUtils::rewriteInExpr(condition);
}
if (OptimizerUtils::relExprHasIndex(inExpr, indexItems)) {
return TransformResult::noTransform();
}
}
}
}

IndexQueryContext ictx;
bool isPrefixScan = false;
if (!OptimizerUtils::findOptimalIndex(filter->condition(), indexItems, &isPrefixScan, &ictx)) {
if (!OptimizerUtils::findOptimalIndex(transformedExpr, indexItems, &isPrefixScan, &ictx)) {
return TransformResult::noTransform();
}

Expand Down
119 changes: 105 additions & 14 deletions src/graph/optimizer/rule/UnionAllIndexScanBaseRule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "graph/planner/plan/PlanNode.h"
#include "graph/planner/plan/Query.h"
#include "graph/planner/plan/Scan.h"
#include "graph/util/ExpressionUtils.h"
#include "interface/gen-cpp2/storage_types.h"

using nebula::graph::Filter;
Expand All @@ -24,25 +25,58 @@ using nebula::graph::TagIndexFullScan;
using nebula::storage::cpp2::IndexQueryContext;

using Kind = nebula::graph::PlanNode::Kind;
using ExprKind = nebula::Expression::Kind;
using TransformResult = nebula::opt::OptRule::TransformResult;

namespace nebula {
namespace opt {

// The matched expression should be either a OR expression or an expression that could be
// rewrote to a OR expression. There are 3 senarios.
//
// 1. OR expr. If OR expr has an IN expr operand that has a valid index, expand it to OR expr.
//
// 2. AND expr such as A in [a, b] AND B when A has a valid index, because it can be transformed to
// (A==a AND B) OR (A==b AND B)
//
// 3. IN expr with its list size > 1, such as A in [a, b] since it can be transformed to (A==a) OR
// (A==b).
// If the list has a size of 1, the expr will be matched with OptimizeTagIndexScanByFilterRule.
bool UnionAllIndexScanBaseRule::match(OptContext* ctx, const MatchedResult& matched) const {
if (!OptRule::match(ctx, matched)) {
return false;
}
auto filter = static_cast<const Filter*>(matched.planNode());
auto scan = static_cast<const IndexScan*>(matched.planNode({0, 0}));
auto condition = filter->condition();
if (!condition->isLogicalExpr() || condition->kind() != Expression::Kind::kLogicalOr) {
return false;
auto conditionType = condition->kind();

if (condition->isLogicalExpr()) {
// Case1: OR Expr
if (conditionType == ExprKind::kLogicalOr) {
return true;
}
// Case2: AND Expr
if (conditionType == ExprKind::kLogicalAnd &&
graph::ExpressionUtils::findAny(static_cast<LogicalExpression*>(condition),
{ExprKind::kRelIn})) {
return true;
}
// Check logical operands
for (auto operand : static_cast<const LogicalExpression*>(condition)->operands()) {
if (!operand->isRelExpr() || !operand->isLogicalExpr()) {
return false;
}
}
}

for (auto operand : static_cast<const LogicalExpression*>(condition)->operands()) {
if (!operand->isRelExpr()) {
return false;
// If the number of elements is less or equal than 1, the IN expr will be transformed into a
// relEQ expr by the OptimizeTagIndexScanByFilterRule.
if (condition->isRelExpr()) {
auto relExpr = static_cast<const RelationalExpression*>(condition);
if (relExpr->kind() == ExprKind::kRelIn && relExpr->right()->isContainerExpr()) {
auto operandsVec = graph::ExpressionUtils::getContainerExprOperands(relExpr->right());
return operandsVec.size() > 1;
}
}

Expand All @@ -52,7 +86,7 @@ bool UnionAllIndexScanBaseRule::match(OptContext* ctx, const MatchedResult& matc
}
}

return true;
return false;
}

StatusOr<TransformResult> UnionAllIndexScanBaseRule::transform(OptContext* ctx,
Expand All @@ -62,20 +96,77 @@ StatusOr<TransformResult> UnionAllIndexScanBaseRule::transform(OptContext* ctx,
auto scan = static_cast<const IndexScan*>(node);

auto metaClient = ctx->qctx()->getMetaClient();
StatusOr<std::vector<std::shared_ptr<meta::cpp2::IndexItem>>> status;
if (node->kind() == graph::PlanNode::Kind::kTagIndexFullScan) {
status = metaClient->getTagIndexesFromCache(scan->space());
} else {
status = metaClient->getEdgeIndexesFromCache(scan->space());
}
auto status = node->kind() == graph::PlanNode::Kind::kTagIndexFullScan
? metaClient->getTagIndexesFromCache(scan->space())
: metaClient->getEdgeIndexesFromCache(scan->space());

NG_RETURN_IF_ERROR(status);
auto indexItems = std::move(status).value();

OptimizerUtils::eraseInvalidIndexItems(scan->schemaId(), &indexItems);

// Check whether the prop has index.
// Rewrite if the property in the IN expr has a valid index
if (indexItems.empty()) {
return TransformResult::noTransform();
}

auto condition = filter->condition();
auto conditionType = condition->kind();
Expression* transformedExpr = condition->clone();

switch (conditionType) {
// Stand alone IN expr
// If it has multiple elements in the list, check valid index before expanding to OR expr
case ExprKind::kRelIn: {
if (!OptimizerUtils::relExprHasIndex(condition, indexItems)) {
return TransformResult::noTransform();
}
transformedExpr = graph::ExpressionUtils::rewriteInExpr(condition);
break;
}

// AND expr containing IN expr operand
case ExprKind::kLogicalAnd: {
// Iterate all operands and expand IN exprs if possible
for (auto& expr : static_cast<LogicalExpression*>(transformedExpr)->operands()) {
if (expr->kind() == ExprKind::kRelIn) {
if (OptimizerUtils::relExprHasIndex(expr, indexItems)) {
expr = graph::ExpressionUtils::rewriteInExpr(expr);
}
}
}

// Reconstruct AND expr using distributive law
transformedExpr = graph::ExpressionUtils::rewriteLogicalAndToLogicalOr(transformedExpr);
break;
}

// OR expr
case ExprKind::kLogicalOr: {
// Iterate all operands and expand IN exprs if possible
for (auto& expr : static_cast<LogicalExpression*>(transformedExpr)->operands()) {
if (expr->kind() == ExprKind::kRelIn) {
if (OptimizerUtils::relExprHasIndex(expr, indexItems)) {
expr = graph::ExpressionUtils::rewriteInExpr(expr);
}
}
}
// Flatten OR exprs
graph::ExpressionUtils::pullOrs(transformedExpr);

break;
}
default:
LOG(FATAL) << "Invalid expression kind: " << static_cast<uint8_t>(conditionType);
break;
}

DCHECK(transformedExpr->kind() == ExprKind::kLogicalOr ||
transformedExpr->kind() == ExprKind::kRelEQ);
std::vector<IndexQueryContext> idxCtxs;
auto condition = static_cast<const LogicalExpression*>(filter->condition());
for (auto operand : condition->operands()) {
auto logicalExpr = static_cast<const LogicalExpression*>(transformedExpr);
for (auto operand : logicalExpr->operands()) {
IndexQueryContext ictx;
bool isPrefixScan = false;
if (!OptimizerUtils::findOptimalIndex(operand, indexItems, &isPrefixScan, &ictx)) {
Expand Down
Loading

0 comments on commit dd2a7d2

Please sign in to comment.