Skip to content
This repository has been archived by the owner on Dec 1, 2022. It is now read-only.

Support lookup indexScan using IN expression as filter #1278

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 33 additions & 1 deletion src/optimizer/OptimizerUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -641,7 +641,9 @@ StatusOr<ScoredColumnHint> selectRelExprIndex(const ColumnDef& field,
}

auto right = expr->right();
DCHECK(right->kind() == Expression::Kind::kConstant);
expr->kind() == Expression::Kind::kRelIn ? DCHECK(right->isContainerExpr())
: DCHECK(right->kind() == Expression::Kind::kConstant);

const auto& value = static_cast<const ConstantExpression*>(right)->value();

ScoredColumnHint hint;
Expand All @@ -663,6 +665,10 @@ StatusOr<ScoredColumnHint> selectRelExprIndex(const ColumnDef& field,
hint.score = IndexScore::kNotEqual;
break;
}
case Expression::Kind::kRelIn: {
// check the property has an index
break;
}
default: {
return Status::Error("Invalid expression kind");
}
Expand Down Expand Up @@ -913,6 +919,32 @@ bool OptimizerUtils::findOptimalIndex(const Expression* condition,
return true;
}

// Check if the relational expression has a valid index
// The left operand should either be a kEdgeProperty or kTagProperty expr
bool OptimizerUtils::relExprHasIndex(
const Expression* expr,
const std::vector<std::shared_ptr<nebula::meta::cpp2::IndexItem>>& indexItems) {
DCHECK(expr->isRelExpr());

for (auto& index : indexItems) {
const auto& fields = index->get_fields();
if (fields.empty()) {
return false;
}

auto left = static_cast<const RelationalExpression*>(expr)->left();
DCHECK(left->kind() == Expression::Kind::kEdgeProperty ||
left->kind() == Expression::Kind::kTagProperty);

auto propExpr = static_cast<const PropertyExpression*>(left);
if (propExpr->prop() == fields[0].get_name()) {
return true;
}
}

return false;
}

void OptimizerUtils::copyIndexScanData(const nebula::graph::IndexScan* from,
nebula::graph::IndexScan* to) {
to->setEmptyResultSet(from->isEmptyResultSet());
Expand Down
4 changes: 4 additions & 0 deletions src/optimizer/OptimizerUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,10 @@ class OptimizerUtils {
bool* isPrefixScan,
nebula::storage::cpp2::IndexQueryContext* ictx);

static bool relExprHasIndex(
const Expression* expr,
const std::vector<std::shared_ptr<nebula::meta::cpp2::IndexItem>>& indexItems);

static void copyIndexScanData(const nebula::graph::IndexScan* from,
nebula::graph::IndexScan* to);
};
Expand Down
16 changes: 15 additions & 1 deletion src/optimizer/rule/OptimizeTagIndexScanByFilterRule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "optimizer/rule/IndexScanRule.h"
#include "planner/plan/PlanNode.h"
#include "planner/plan/Scan.h"
#include "util/ExpressionUtils.h"

using nebula::graph::Filter;
using nebula::graph::OptimizerUtils;
Expand Down Expand Up @@ -46,6 +47,9 @@ const Pattern& OptimizeTagIndexScanByFilterRule::pattern() const {
return pattern;
}

// Match 2 kinds of expressions:
// 1. Relational expr
// 2. Logical AND expr
bool OptimizeTagIndexScanByFilterRule::match(OptContext* ctx, const MatchedResult& matched) const {
if (!OptRule::match(ctx, matched)) {
return false;
Expand All @@ -57,14 +61,24 @@ bool OptimizeTagIndexScanByFilterRule::match(OptContext* ctx, const MatchedResul
return false;
}
}

auto condition = filter->condition();
if (condition->isRelExpr()) {
auto relExpr = static_cast<const RelationalExpression*>(condition);
// If the container in the IN expr has only 1 element, it will be converted to an relEQ
// expr. If more than 1 element found in the container, UnionAllIndexScanBaseRule will be
// applied.
if (relExpr->kind() == ExprKind::kRelIn && relExpr->right()->isContainerExpr()) {
auto ContainerOperands =
graph::ExpressionUtils::getContainerExprOperands(relExpr->right());
return ContainerOperands.size() == 1;
}

return relExpr->left()->kind() == ExprKind::kTagProperty &&
relExpr->right()->kind() == ExprKind::kConstant;
}
if (condition->isLogicalExpr()) {
return condition->kind() == Expression::Kind::kLogicalAnd;
return condition->kind() == ExprKind::kLogicalAnd;
}

return false;
Expand Down
109 changes: 95 additions & 14 deletions src/optimizer/rule/UnionAllIndexScanBaseRule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "planner/plan/PlanNode.h"
#include "planner/plan/Query.h"
#include "planner/plan/Scan.h"
#include "util/ExpressionUtils.h"

using nebula::graph::Filter;
using nebula::graph::IndexScan;
Expand All @@ -24,25 +25,51 @@ using nebula::graph::TagIndexFullScan;
using nebula::storage::cpp2::IndexQueryContext;

using Kind = nebula::graph::PlanNode::Kind;
using ExprKind = nebula::Expression::Kind;
using TransformResult = nebula::opt::OptRule::TransformResult;

namespace nebula {
namespace opt {

// The matched expression should be either a OR expression or an expression that could be
// rewrote to a OR expression. There are 3 senarios.
// 1. OR expr. If OR expr has IN expr operand that has a valid index, expand it to OR expr.
// 2. AND expr such as A in [a, b] AND B, because it can be transformed to (A==a AND B) OR (A==b
// AND B)
// 3. IN expr such as A in [a, b] since it can be transformed to (A==a) OR (A==b)
bool UnionAllIndexScanBaseRule::match(OptContext* ctx, const MatchedResult& matched) const {
if (!OptRule::match(ctx, matched)) {
return false;
}
auto filter = static_cast<const Filter*>(matched.planNode());
auto scan = static_cast<const IndexScan*>(matched.planNode({0, 0}));
auto condition = filter->condition();
if (!condition->isLogicalExpr() || condition->kind() != Expression::Kind::kLogicalOr) {
return false;
auto conditionType = condition->kind();

if (condition->isLogicalExpr()) {
if (conditionType == ExprKind::kLogicalOr) {
return true;
}
if (conditionType == ExprKind::kLogicalAnd &&
graph::ExpressionUtils::findAny(static_cast<LogicalExpression*>(condition),
{ExprKind::kRelIn})) {
return true;
}
// Check logical operands
for (auto operand : static_cast<const LogicalExpression*>(condition)->operands()) {
if (!operand->isRelExpr() || !operand->isLogicalExpr()) {
return false;
}
}
}

for (auto operand : static_cast<const LogicalExpression*>(condition)->operands()) {
if (!operand->isRelExpr()) {
return false;
// If the number of elements is less or equal than 1, the IN expr will be transformed into a
// relEQ expr by the OptimizeTagIndexScanByFilterRule.
if (condition->isRelExpr()) {
auto relExpr = static_cast<const RelationalExpression*>(condition);
if (relExpr->kind() == ExprKind::kRelIn && relExpr->right()->isContainerExpr()) {
auto operandsVec = graph::ExpressionUtils::getContainerExprOperands(relExpr->right());
return operandsVec.size() > 1;
}
}

Expand All @@ -52,30 +79,84 @@ bool UnionAllIndexScanBaseRule::match(OptContext* ctx, const MatchedResult& matc
}
}

return true;
return false;
}

// If the IN expr has only 1 element in its container, it will be converted to an relEQ expr
StatusOr<TransformResult> UnionAllIndexScanBaseRule::transform(OptContext* ctx,
const MatchedResult& matched) const {
auto filter = static_cast<const Filter*>(matched.planNode());
auto node = matched.planNode({0, 0});
auto scan = static_cast<const IndexScan*>(node);

auto metaClient = ctx->qctx()->getMetaClient();
StatusOr<std::vector<std::shared_ptr<meta::cpp2::IndexItem>>> status;
if (node->kind() == graph::PlanNode::Kind::kTagIndexFullScan) {
status = metaClient->getTagIndexesFromCache(scan->space());
} else {
status = metaClient->getEdgeIndexesFromCache(scan->space());
}
auto status = node->kind() == graph::PlanNode::Kind::kTagIndexFullScan
? metaClient->getTagIndexesFromCache(scan->space())
: metaClient->getEdgeIndexesFromCache(scan->space());
NG_RETURN_IF_ERROR(status);
auto indexItems = std::move(status).value();

OptimizerUtils::eraseInvalidIndexItems(scan->schemaId(), &indexItems);

// Check whether the prop has index.
// Rewrite if the property in the IN expr has a valid index
if (indexItems.empty()) {
return TransformResult::noTransform();
}

auto condition = filter->condition();
auto conditionType = condition->kind();
Expression* transformedExpr = condition->clone();

// Stand alone IN expr
if (conditionType == ExprKind::kRelIn) {
if (!OptimizerUtils::relExprHasIndex(condition, indexItems)) {
return TransformResult::noTransform();
}
transformedExpr = graph::ExpressionUtils::rewriteInExpr(condition);
}

// AND expr containing IN expr operand
if (conditionType == ExprKind::kLogicalAnd) {
auto relInExprs = graph::ExpressionUtils::collectAll(transformedExpr, {ExprKind::kRelIn});
DCHECK(!relInExprs.empty());
bool indexFound = false;
// Iterate all operands and expand IN exprs if possible
for (auto& expr : static_cast<LogicalExpression*>(transformedExpr)->operands()) {
if (expr->kind() == ExprKind::kRelIn) {
if (OptimizerUtils::relExprHasIndex(transformedExpr, indexItems)) {
expr = graph::ExpressionUtils::rewriteInExpr(expr);
}
}
}
if (!indexFound) {
return TransformResult::noTransform();
}

// Reconstruct AND expr using distributive law
}

// OR expr
if (conditionType == ExprKind::kLogicalOr) {
auto relInExprs = graph::ExpressionUtils::collectAll(transformedExpr, {ExprKind::kRelIn});
if (!relInExprs.empty()) {
// Iterate all operands and expand IN exprs if possible
for (auto& expr : static_cast<LogicalExpression*>(transformedExpr)->operands()) {
if (expr->kind() == ExprKind::kRelIn) {
if (OptimizerUtils::relExprHasIndex(expr, indexItems)) {
expr = graph::ExpressionUtils::rewriteInExpr(expr);
}
}
}
// Flatten OR exprs
graph::ExpressionUtils::pullOrs(transformedExpr);
}
}

DCHECK(transformedExpr->kind() == ExprKind::kLogicalOr);
std::vector<IndexQueryContext> idxCtxs;
auto condition = static_cast<const LogicalExpression*>(filter->condition());
for (auto operand : condition->operands()) {
auto logicalExpr = static_cast<const LogicalExpression*>(transformedExpr);
for (auto operand : logicalExpr->operands()) {
IndexQueryContext ictx;
bool isPrefixScan = false;
if (!OptimizerUtils::findOptimalIndex(operand, indexItems, &isPrefixScan, &ictx)) {
Expand Down
55 changes: 55 additions & 0 deletions src/util/ExpressionUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,61 @@ Expression *ExpressionUtils::rewriteAgg2VarProp(const Expression *expr) {
return RewriteVisitor::transform(expr, std::move(matcher), std::move(rewriter));
}

// Rewrite the IN expr to a relEQ expr if the right operand has only 1 element.
// Rewrite the IN expr to a OR expr if the right operand has more than 1 element.
Expression *ExpressionUtils::rewriteInExpr(const Expression *expr) {
DCHECK(expr->kind() == Expression::Kind::kRelIn);
auto pool = expr->getObjPool();
auto inExpr = static_cast<RelationalExpression *>(expr->clone());
auto containerOperands = getContainerExprOperands(inExpr->right());

auto operandSize = containerOperands.size();
// container has only 1 element, no need to transform to logical expression
if (operandSize == 1) {
return RelationalExpression::makeEQ(pool, inExpr->left(), containerOperands[0]);
}

std::vector<Expression *> orExprOperands;
orExprOperands.reserve(operandSize);
// A in [B, C, D] => (A == B) or (A == C) or (A == D)
for (auto *operand : containerOperands) {
orExprOperands.emplace_back(RelationalExpression::makeEQ(pool, inExpr->left(), operand));
}
auto orExpr = LogicalExpression::makeOr(pool);
orExpr->setOperands(orExprOperands);

return orExpr;
}

std::vector<Expression *> ExpressionUtils::getContainerExprOperands(const Expression *expr) {
DCHECK(expr->isContainerExpr());
auto pool = expr->getObjPool();
auto containerExpr = expr->clone();

std::vector<Expression *> containerOperands;
switch (containerExpr->kind()) {
case Expression::Kind::kList:
containerOperands = static_cast<ListExpression *>(containerExpr)->get();
break;
case Expression::Kind::kSet: {
containerOperands = static_cast<SetExpression *>(containerExpr)->get();
break;
}
case Expression::Kind::kMap: {
auto mapItems = static_cast<MapExpression *>(containerExpr)->get();
// iterate map and add key into containerOperands
for (auto &item : mapItems) {
containerOperands.emplace_back(
ConstantExpression::make(pool, std::move(item.first)));
}
break;
}
default:
LOG(FATAL) << "Invalid expression type " << containerExpr->kind();
}
return containerOperands;
}

StatusOr<Expression *> ExpressionUtils::foldConstantExpr(const Expression *expr) {
ObjectPool* objPool = expr->getObjPool();
auto newExpr = expr->clone();
Expand Down
12 changes: 12 additions & 0 deletions src/util/ExpressionUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,13 +69,25 @@ class ExpressionUtils {
static Expression* rewriteRelExprHelper(const Expression* expr,
Expression*& relRightOperandExpr);

// Rewrite IN expression into OR expression or relEQ expression
static Expression* rewriteInExpr(const Expression* expr);

// Return the operands of container expressions
// For list and set, return the operands
// For map, return the keys
static std::vector<Expression*> getContainerExprOperands(const Expression* expr);

// Clone and fold constant expression
static StatusOr<Expression*> foldConstantExpr(const Expression* expr);

// Clone and reduce unaryNot expression
static Expression* reduceUnaryNotExpr(const Expression* expr);

// Transform filter using multiple expression rewrite strategies
// 1. rewrite relational expressions containing arithmetic operands so that
// all constants are on the right side of relExpr.
// 2. fold constant
// 3. reduce unary expression e.g. !(A and B) => !A or !B
static StatusOr<Expression*> filterTransform(const Expression* expr);

// Negate the given logical expr: (A && B) -> (!A || !B)
Expand Down
Loading