Skip to content
This repository has been archived by the owner on Dec 1, 2022. It is now read-only.

Commit

Permalink
Refactor the index choosing logic for IN expr
Browse files Browse the repository at this point in the history
  • Loading branch information
Aiee committed Jul 30, 2021
1 parent e68b136 commit 1542f72
Show file tree
Hide file tree
Showing 9 changed files with 214 additions and 88 deletions.
34 changes: 33 additions & 1 deletion src/optimizer/OptimizerUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -641,7 +641,9 @@ StatusOr<ScoredColumnHint> selectRelExprIndex(const ColumnDef& field,
}

auto right = expr->right();
DCHECK(right->kind() == Expression::Kind::kConstant);
expr->kind() == Expression::Kind::kRelIn ? DCHECK(right->isContainerExpr())
: DCHECK(right->kind() == Expression::Kind::kConstant);

const auto& value = static_cast<const ConstantExpression*>(right)->value();

ScoredColumnHint hint;
Expand All @@ -663,6 +665,10 @@ StatusOr<ScoredColumnHint> selectRelExprIndex(const ColumnDef& field,
hint.score = IndexScore::kNotEqual;
break;
}
case Expression::Kind::kRelIn: {
// check the property has an index
break;
}
default: {
return Status::Error("Invalid expression kind");
}
Expand Down Expand Up @@ -913,6 +919,32 @@ bool OptimizerUtils::findOptimalIndex(const Expression* condition,
return true;
}

// Check if the relational expression has a valid index
// The left operand should either be a kEdgeProperty or kTagProperty expr
bool OptimizerUtils::relExprHasIndex(
const Expression* expr,
const std::vector<std::shared_ptr<nebula::meta::cpp2::IndexItem>>& indexItems) {
DCHECK(expr->isRelExpr());

for (auto& index : indexItems) {
const auto& fields = index->get_fields();
if (fields.empty()) {
return false;
}

auto left = static_cast<const RelationalExpression*>(expr)->left();
DCHECK(left->kind() == Expression::Kind::kEdgeProperty ||
left->kind() == Expression::Kind::kTagProperty);

auto propExpr = static_cast<const PropertyExpression*>(left);
if (propExpr->prop() == fields[0].get_name()) {
return true;
}
}

return false;
}

void OptimizerUtils::copyIndexScanData(const nebula::graph::IndexScan* from,
nebula::graph::IndexScan* to) {
to->setEmptyResultSet(from->isEmptyResultSet());
Expand Down
4 changes: 4 additions & 0 deletions src/optimizer/OptimizerUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,10 @@ class OptimizerUtils {
bool* isPrefixScan,
nebula::storage::cpp2::IndexQueryContext* ictx);

static bool relExprHasIndex(
const Expression* expr,
const std::vector<std::shared_ptr<nebula::meta::cpp2::IndexItem>>& indexItems);

static void copyIndexScanData(const nebula::graph::IndexScan* from,
nebula::graph::IndexScan* to);
};
Expand Down
16 changes: 15 additions & 1 deletion src/optimizer/rule/OptimizeTagIndexScanByFilterRule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "optimizer/rule/IndexScanRule.h"
#include "planner/plan/PlanNode.h"
#include "planner/plan/Scan.h"
#include "util/ExpressionUtils.h"

using nebula::graph::Filter;
using nebula::graph::OptimizerUtils;
Expand Down Expand Up @@ -46,6 +47,9 @@ const Pattern& OptimizeTagIndexScanByFilterRule::pattern() const {
return pattern;
}

// Match 2 kinds of expressions:
// 1. Relational expr
// 2. Logical AND expr
bool OptimizeTagIndexScanByFilterRule::match(OptContext* ctx, const MatchedResult& matched) const {
if (!OptRule::match(ctx, matched)) {
return false;
Expand All @@ -57,14 +61,24 @@ bool OptimizeTagIndexScanByFilterRule::match(OptContext* ctx, const MatchedResul
return false;
}
}

auto condition = filter->condition();
if (condition->isRelExpr()) {
auto relExpr = static_cast<const RelationalExpression*>(condition);
// If the container in the IN expr has only 1 element, it will be converted to an relEQ
// expr. If more than 1 element found in the container, UnionAllIndexScanBaseRule will be
// applied.
if (relExpr->kind() == ExprKind::kRelIn && relExpr->right()->isContainerExpr()) {
auto ContainerOperands =
graph::ExpressionUtils::getContainerExprOperands(relExpr->right());
return ContainerOperands.size() == 1;
}

return relExpr->left()->kind() == ExprKind::kTagProperty &&
relExpr->right()->kind() == ExprKind::kConstant;
}
if (condition->isLogicalExpr()) {
return condition->kind() == Expression::Kind::kLogicalAnd;
return condition->kind() == ExprKind::kLogicalAnd;
}

return false;
Expand Down
109 changes: 95 additions & 14 deletions src/optimizer/rule/UnionAllIndexScanBaseRule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "planner/plan/PlanNode.h"
#include "planner/plan/Query.h"
#include "planner/plan/Scan.h"
#include "util/ExpressionUtils.h"

using nebula::graph::Filter;
using nebula::graph::IndexScan;
Expand All @@ -24,25 +25,51 @@ using nebula::graph::TagIndexFullScan;
using nebula::storage::cpp2::IndexQueryContext;

using Kind = nebula::graph::PlanNode::Kind;
using ExprKind = nebula::Expression::Kind;
using TransformResult = nebula::opt::OptRule::TransformResult;

namespace nebula {
namespace opt {

// The matched expression should be either a OR expression or an expression that could be
// rewrote to a OR expression. There are 3 senarios.
// 1. OR expr. If OR expr has IN expr operand that has a valid index, expand it to OR expr.
// 2. AND expr such as A in [a, b] AND B, because it can be transformed to (A==a AND B) OR (A==b
// AND B)
// 3. IN expr such as A in [a, b] since it can be transformed to (A==a) OR (A==b)
bool UnionAllIndexScanBaseRule::match(OptContext* ctx, const MatchedResult& matched) const {
if (!OptRule::match(ctx, matched)) {
return false;
}
auto filter = static_cast<const Filter*>(matched.planNode());
auto scan = static_cast<const IndexScan*>(matched.planNode({0, 0}));
auto condition = filter->condition();
if (!condition->isLogicalExpr() || condition->kind() != Expression::Kind::kLogicalOr) {
return false;
auto conditionType = condition->kind();

if (condition->isLogicalExpr()) {
if (conditionType == ExprKind::kLogicalOr) {
return true;
}
if (conditionType == ExprKind::kLogicalAnd &&
graph::ExpressionUtils::findAny(static_cast<LogicalExpression*>(condition),
{ExprKind::kRelIn})) {
return true;
}
// Check logical operands
for (auto operand : static_cast<const LogicalExpression*>(condition)->operands()) {
if (!operand->isRelExpr() || !operand->isLogicalExpr()) {
return false;
}
}
}

for (auto operand : static_cast<const LogicalExpression*>(condition)->operands()) {
if (!operand->isRelExpr()) {
return false;
// If the number of elements is less or equal than 1, the IN expr will be transformed into a
// relEQ expr by the OptimizeTagIndexScanByFilterRule.
if (condition->isRelExpr()) {
auto relExpr = static_cast<const RelationalExpression*>(condition);
if (relExpr->kind() == ExprKind::kRelIn && relExpr->right()->isContainerExpr()) {
auto operandsVec = graph::ExpressionUtils::getContainerExprOperands(relExpr->right());
return operandsVec.size() > 1;
}
}

Expand All @@ -52,30 +79,84 @@ bool UnionAllIndexScanBaseRule::match(OptContext* ctx, const MatchedResult& matc
}
}

return true;
return false;
}

// If the IN expr has only 1 element in its container, it will be converted to an relEQ expr
StatusOr<TransformResult> UnionAllIndexScanBaseRule::transform(OptContext* ctx,
const MatchedResult& matched) const {
auto filter = static_cast<const Filter*>(matched.planNode());
auto node = matched.planNode({0, 0});
auto scan = static_cast<const IndexScan*>(node);

auto metaClient = ctx->qctx()->getMetaClient();
StatusOr<std::vector<std::shared_ptr<meta::cpp2::IndexItem>>> status;
if (node->kind() == graph::PlanNode::Kind::kTagIndexFullScan) {
status = metaClient->getTagIndexesFromCache(scan->space());
} else {
status = metaClient->getEdgeIndexesFromCache(scan->space());
}
auto status = node->kind() == graph::PlanNode::Kind::kTagIndexFullScan
? metaClient->getTagIndexesFromCache(scan->space())
: metaClient->getEdgeIndexesFromCache(scan->space());
NG_RETURN_IF_ERROR(status);
auto indexItems = std::move(status).value();

OptimizerUtils::eraseInvalidIndexItems(scan->schemaId(), &indexItems);

// Check whether the prop has index.
// Rewrite if the property in the IN expr has a valid index
if (indexItems.empty()) {
return TransformResult::noTransform();
}

auto condition = filter->condition();
auto conditionType = condition->kind();
Expression* transformedExpr = condition->clone();

// Stand alone IN expr
if (conditionType == ExprKind::kRelIn) {
if (!OptimizerUtils::relExprHasIndex(condition, indexItems)) {
return TransformResult::noTransform();
}
transformedExpr = graph::ExpressionUtils::rewriteInExpr(condition);
}

// AND expr containing IN expr operand
if (conditionType == ExprKind::kLogicalAnd) {
auto relInExprs = graph::ExpressionUtils::collectAll(transformedExpr, {ExprKind::kRelIn});
DCHECK(!relInExprs.empty());
bool indexFound = false;
// Iterate all operands and expand IN exprs if possible
for (auto& expr : static_cast<LogicalExpression*>(transformedExpr)->operands()) {
if (expr->kind() == ExprKind::kRelIn) {
if (OptimizerUtils::relExprHasIndex(transformedExpr, indexItems)) {
expr = graph::ExpressionUtils::rewriteInExpr(expr);
}
}
}
if (!indexFound) {
return TransformResult::noTransform();
}

// Reconstruct AND expr using distributive law
}

// OR expr
if (conditionType == ExprKind::kLogicalOr) {
auto relInExprs = graph::ExpressionUtils::collectAll(transformedExpr, {ExprKind::kRelIn});
if (!relInExprs.empty()) {
// Iterate all operands and expand IN exprs if possible
for (auto& expr : static_cast<LogicalExpression*>(transformedExpr)->operands()) {
if (expr->kind() == ExprKind::kRelIn) {
if (OptimizerUtils::relExprHasIndex(expr, indexItems)) {
expr = graph::ExpressionUtils::rewriteInExpr(expr);
}
}
}
// Flatten OR exprs
graph::ExpressionUtils::pullOrs(transformedExpr);
}
}

DCHECK(transformedExpr->kind() == ExprKind::kLogicalOr);
std::vector<IndexQueryContext> idxCtxs;
auto condition = static_cast<const LogicalExpression*>(filter->condition());
for (auto operand : condition->operands()) {
auto logicalExpr = static_cast<const LogicalExpression*>(transformedExpr);
for (auto operand : logicalExpr->operands()) {
IndexQueryContext ictx;
bool isPrefixScan = false;
if (!OptimizerUtils::findOptimalIndex(operand, indexItems, &isPrefixScan, &ictx)) {
Expand Down
55 changes: 55 additions & 0 deletions src/util/ExpressionUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,61 @@ Expression *ExpressionUtils::rewriteAgg2VarProp(const Expression *expr) {
return RewriteVisitor::transform(expr, std::move(matcher), std::move(rewriter));
}

// Rewrite the IN expr to a relEQ expr if the right operand has only 1 element.
// Rewrite the IN expr to a OR expr if the right operand has more than 1 element.
Expression *ExpressionUtils::rewriteInExpr(const Expression *expr) {
DCHECK(expr->kind() == Expression::Kind::kRelIn);
auto pool = expr->getObjPool();
auto inExpr = static_cast<RelationalExpression *>(expr->clone());
auto containerOperands = getContainerExprOperands(inExpr->right());

auto operandSize = containerOperands.size();
// container has only 1 element, no need to transform to logical expression
if (operandSize == 1) {
return RelationalExpression::makeEQ(pool, inExpr->left(), containerOperands[0]);
}

std::vector<Expression *> orExprOperands;
orExprOperands.reserve(operandSize);
// A in [B, C, D] => (A == B) or (A == C) or (A == D)
for (auto *operand : containerOperands) {
orExprOperands.emplace_back(RelationalExpression::makeEQ(pool, inExpr->left(), operand));
}
auto orExpr = LogicalExpression::makeOr(pool);
orExpr->setOperands(orExprOperands);

return orExpr;
}

std::vector<Expression *> ExpressionUtils::getContainerExprOperands(const Expression *expr) {
DCHECK(expr->isContainerExpr());
auto pool = expr->getObjPool();
auto containerExpr = expr->clone();

std::vector<Expression *> containerOperands;
switch (containerExpr->kind()) {
case Expression::Kind::kList:
containerOperands = static_cast<ListExpression *>(containerExpr)->get();
break;
case Expression::Kind::kSet: {
containerOperands = static_cast<SetExpression *>(containerExpr)->get();
break;
}
case Expression::Kind::kMap: {
auto mapItems = static_cast<MapExpression *>(containerExpr)->get();
// iterate map and add key into containerOperands
for (auto &item : mapItems) {
containerOperands.emplace_back(
ConstantExpression::make(pool, std::move(item.first)));
}
break;
}
default:
LOG(FATAL) << "Invalid expression type " << containerExpr->kind();
}
return containerOperands;
}

StatusOr<Expression *> ExpressionUtils::foldConstantExpr(const Expression *expr) {
ObjectPool* objPool = expr->getObjPool();
auto newExpr = expr->clone();
Expand Down
8 changes: 8 additions & 0 deletions src/util/ExpressionUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,14 @@ class ExpressionUtils {
static Expression* rewriteRelExprHelper(const Expression* expr,
Expression*& relRightOperandExpr);

// Rewrite IN expression into OR expression or relEQ expression
static Expression* rewriteInExpr(const Expression* expr);

// Return the operands of container expressions
// For list and set, return the operands
// For map, return the keys
static std::vector<Expression*> getContainerExprOperands(const Expression* expr);

// Clone and fold constant expression
static StatusOr<Expression*> foldConstantExpr(const Expression* expr);

Expand Down
Loading

0 comments on commit 1542f72

Please sign in to comment.