diff --git a/src/graph/optimizer/OptimizerUtils.cpp b/src/graph/optimizer/OptimizerUtils.cpp index 8ecb4fa59ae..95da9844dad 100644 --- a/src/graph/optimizer/OptimizerUtils.cpp +++ b/src/graph/optimizer/OptimizerUtils.cpp @@ -272,8 +272,8 @@ std::vector collectUnusedExpr( return unusedOperands; } -StatusOr selectLogicalExprIndex(const LogicalExpression* expr, - const IndexItem& index) { +StatusOr selectLogicalAndExprIndex(const LogicalExpression* expr, + const IndexItem& index) { if (expr->kind() != Expression::Kind::kLogicalAnd) { return Status::Error("Invalid expression kind."); } @@ -304,8 +304,8 @@ StatusOr selectIndex(const Expression* expr, const IndexItem& index return selectRelExprIndex(static_cast(expr), index); } - if (expr->isLogicalExpr()) { - return selectLogicalExprIndex(static_cast(expr), index); + if (expr->kind() == Expression::Kind::kLogicalAnd) { + return selectLogicalAndExprIndex(static_cast(expr), index); } return Status::Error("Invalid expression kind."); diff --git a/src/graph/optimizer/rule/OptimizeEdgeIndexScanByFilterRule.cpp b/src/graph/optimizer/rule/OptimizeEdgeIndexScanByFilterRule.cpp index 8b4fb01aa39..922a188ea98 100644 --- a/src/graph/optimizer/rule/OptimizeEdgeIndexScanByFilterRule.cpp +++ b/src/graph/optimizer/rule/OptimizeEdgeIndexScanByFilterRule.cpp @@ -105,7 +105,6 @@ StatusOr OptimizeEdgeIndexScanByFilterRule::transform( // Stand alone IN expr with only 1 element in the list, no need to check index if (conditionType == ExprKind::kRelIn) { transformedExpr = graph::ExpressionUtils::rewriteInExpr(condition); - DCHECK(transformedExpr->kind() == ExprKind::kRelEQ); } // case2: logical AND expr diff --git a/src/graph/optimizer/rule/OptimizeTagIndexScanByFilterRule.cpp b/src/graph/optimizer/rule/OptimizeTagIndexScanByFilterRule.cpp index 56310e3aba1..64386f783d8 100644 --- a/src/graph/optimizer/rule/OptimizeTagIndexScanByFilterRule.cpp +++ b/src/graph/optimizer/rule/OptimizeTagIndexScanByFilterRule.cpp @@ -72,9 +72,15 @@ bool OptimizeTagIndexScanByFilterRule::match(OptContext* ctx, const MatchedResul // If the container in the IN expr has only 1 element, it will be converted to an relEQ // expr. If more than 1 element found in the container, UnionAllIndexScanBaseRule will be // applied. - if (relExpr->kind() == ExprKind::kRelIn && relExpr->right()->isContainerExpr()) { - auto ContainerOperands = graph::ExpressionUtils::getContainerExprOperands(relExpr->right()); - return ContainerOperands.size() == 1; + auto* rhs = relExpr->right(); + if (relExpr->kind() == ExprKind::kRelIn) { + if (rhs->isContainerExpr()) { + return graph::ExpressionUtils::getContainerExprOperands(relExpr->right()).size() == 1; + } else if (rhs->kind() == Expression::Kind::kConstant) { + auto constExprValue = static_cast(rhs)->value(); + return (constExprValue.isList() && constExprValue.getList().size() == 1) || + (constExprValue.isSet() && constExprValue.getSet().size() == 1); + } } return relExpr->left()->kind() == ExprKind::kTagProperty && relExpr->right()->kind() == ExprKind::kConstant; @@ -115,11 +121,10 @@ StatusOr OptimizeTagIndexScanByFilterRule::transform( // Stand alone IN expr with only 1 element in the list, no need to check index if (conditionType == ExprKind::kRelIn) { transformedExpr = graph::ExpressionUtils::rewriteInExpr(condition); - DCHECK(transformedExpr->kind() == ExprKind::kRelEQ); + } else if (conditionType == ExprKind::kStartsWith) { // StartsWith expr is converted to a RelAnd expr with a constant value transformedExpr = graph::ExpressionUtils::rewriteStartsWithExpr(condition); - DCHECK(transformedExpr->kind() == ExprKind::kLogicalAnd); } // case2: logical AND expr @@ -130,15 +135,23 @@ StatusOr OptimizeTagIndexScanByFilterRule::transform( auto inExpr = static_cast(operand); // Do not apply this rule if the IN expr has a valid index or it has more than 1 element in // the list - if (static_cast(inExpr->right())->size() > 1) { + auto* rhs = inExpr->right(); + if (rhs->isContainerExpr() && + graph::ExpressionUtils::getContainerExprOperands(rhs).size() > 1) { return TransformResult::noTransform(); - } else { - // If the inner IN expr has only 1 element, rewrite it to an relEQ expression and there is - // no need to check whether it has a index - auto relEqExpr = graph::ExpressionUtils::rewriteInExpr(inExpr); - static_cast(transformedExpr)->setOperand(operandIndex, relEqExpr); - continue; + } else if (rhs->kind() == Expression::Kind::kConstant) { + auto constExprValue = static_cast(rhs)->value(); + if ((constExprValue.isList() && constExprValue.getList().size() > 1) || + (constExprValue.isSet() && constExprValue.getSet().size() > 1)) { + return TransformResult::noTransform(); + } } + // If the inner IN expr has only 1 element, rewrite it to an relEQ expression and there is + // no need to check whether it has a index + auto relEqExpr = graph::ExpressionUtils::rewriteInExpr(inExpr); + static_cast(transformedExpr)->setOperand(operandIndex, relEqExpr); + continue; + if (OptimizerUtils::relExprHasIndex(inExpr, indexItems)) { return TransformResult::noTransform(); } diff --git a/src/graph/optimizer/rule/UnionAllIndexScanBaseRule.cpp b/src/graph/optimizer/rule/UnionAllIndexScanBaseRule.cpp index b3f18a473d1..645bb6794c8 100644 --- a/src/graph/optimizer/rule/UnionAllIndexScanBaseRule.cpp +++ b/src/graph/optimizer/rule/UnionAllIndexScanBaseRule.cpp @@ -70,9 +70,15 @@ bool UnionAllIndexScanBaseRule::match(OptContext* ctx, const MatchedResult& matc // relEQ expr by the OptimizeTagIndexScanByFilterRule. if (condition->isRelExpr()) { auto relExpr = static_cast(condition); - if (relExpr->kind() == ExprKind::kRelIn && relExpr->right()->isContainerExpr()) { - auto operandsVec = graph::ExpressionUtils::getContainerExprOperands(relExpr->right()); - return operandsVec.size() > 1; + auto* rhs = relExpr->right(); + if (relExpr->kind() == ExprKind::kRelIn) { + if (rhs->isContainerExpr()) { + return graph::ExpressionUtils::getContainerExprOperands(rhs).size() > 1; + } else if (rhs->kind() == Expression::Kind::kConstant) { + auto constExprValue = static_cast(rhs)->value(); + return (constExprValue.isList() && constExprValue.getList().size() > 1) || + (constExprValue.isSet() && constExprValue.getSet().size() > 1); + } } } diff --git a/src/graph/util/ExpressionUtils.cpp b/src/graph/util/ExpressionUtils.cpp index 8e10fe57441..4dee63c1bcb 100644 --- a/src/graph/util/ExpressionUtils.cpp +++ b/src/graph/util/ExpressionUtils.cpp @@ -320,22 +320,45 @@ Expression *ExpressionUtils::rewriteInExpr(const Expression *expr) { DCHECK(expr->kind() == Expression::Kind::kRelIn); auto pool = expr->getObjPool(); auto inExpr = static_cast(expr->clone()); - auto containerOperands = getContainerExprOperands(inExpr->right()); + auto orExpr = LogicalExpression::makeOr(pool); + if (inExpr->right()->isContainerExpr()) { + auto containerOperands = getContainerExprOperands(inExpr->right()); - auto operandSize = containerOperands.size(); - // container has only 1 element, no need to transform to logical expression - if (operandSize == 1) { - return RelationalExpression::makeEQ(pool, inExpr->left(), containerOperands[0]); - } + auto operandSize = containerOperands.size(); + // container has only 1 element, no need to transform to logical expression + if (operandSize == 1) { + return RelationalExpression::makeEQ(pool, inExpr->left(), containerOperands[0]); + } - std::vector orExprOperands; - orExprOperands.reserve(operandSize); - // A in [B, C, D] => (A == B) or (A == C) or (A == D) - for (auto *operand : containerOperands) { - orExprOperands.emplace_back(RelationalExpression::makeEQ(pool, inExpr->left(), operand)); + std::vector orExprOperands; + orExprOperands.reserve(operandSize); + // A in [B, C, D] => (A == B) or (A == C) or (A == D) + for (auto *operand : containerOperands) { + orExprOperands.emplace_back(RelationalExpression::makeEQ(pool, inExpr->left(), operand)); + } + orExpr->setOperands(orExprOperands); + } else if (inExpr->right()->kind() == Expression::Kind::kConstant) { + auto constExprValue = static_cast(inExpr->right())->value(); + std::vector values; + if (constExprValue.isList()) { + values = constExprValue.getList().values; + } else if (constExprValue.isSet()) { + auto setValues = constExprValue.getSet().values; + values = std::vector{std::make_move_iterator(setValues.begin()), + std::make_move_iterator(setValues.end())}; + } else { + return const_cast(expr); + } + std::vector operands; + for (const auto &v : values) { + operands.emplace_back( + RelationalExpression::makeEQ(pool, inExpr->left(), ConstantExpression::make(pool, v))); + } + if (operands.size() == 1) { + return operands[0]; + } + orExpr->setOperands(operands); } - auto orExpr = LogicalExpression::makeOr(pool); - orExpr->setOperands(orExprOperands); return orExpr; } diff --git a/src/graph/validator/LookupValidator.cpp b/src/graph/validator/LookupValidator.cpp index a3a9500cb97..2ea3be001ce 100644 --- a/src/graph/validator/LookupValidator.cpp +++ b/src/graph/validator/LookupValidator.cpp @@ -446,10 +446,11 @@ StatusOr LookupValidator::checkConstExpr(Expression* expr, // TODO(Aiee) extract the type cast logic as a method if we decide to support // more cross-type comparisons. + auto propType = SchemaUtil::propTypeToValueType(type); // Allow different numeric type to compare - if (graph::SchemaUtil::propTypeToValueType(type) == Value::Type::FLOAT && v.isInt()) { + if (propType == Value::Type::FLOAT && v.isInt()) { return ConstantExpression::make(pool, v.toFloat()); - } else if (graph::SchemaUtil::propTypeToValueType(type) == Value::Type::INT && v.isFloat()) { + } else if (propType == Value::Type::INT && v.isFloat()) { // col1 < 10.5 range: [min, 11), col1 < 10 range: [min, 10) double f = v.getFloat(); int iCeil = ceil(f); @@ -462,10 +463,34 @@ StatusOr LookupValidator::checkConstExpr(Expression* expr, return ConstantExpression::make(pool, iCeil); } return ConstantExpression::make(pool, iFloor); + } else if (kind == ExprKind::kRelIn || kind == ExprKind::kRelNotIn) { + if (v.isList()) { + auto items = v.getList().values; + for (const auto& item : items) { + if (item.type() != propType) { + std::stringstream ss; + ss << "Column type of " << prop.c_str() << " mismatched. Expected " << propType + << " but was " << item.type(); + return Status::SemanticError(ss.str()); + } + } + return ConstantExpression::make(pool, std::move(v)); + } else if (v.isSet()) { + auto items = v.getSet().values; + for (const auto& item : items) { + if (item.type() != propType) { + std::stringstream ss; + ss << "Column type of " << prop.c_str() << " mismatched. Expected " << propType + << " but was " << item.type(); + return Status::SemanticError(ss.str()); + } + } + return ConstantExpression::make(pool, std::move(v)); + } } // Check prop type - if (v.type() != SchemaUtil::propTypeToValueType(type)) { + if (v.type() != propType) { // allow different types in the IN expression, such as "abc" IN ["abc"] if (!expr->isContainerExpr()) { return Status::SemanticError("Column type error : %s", prop.c_str()); diff --git a/src/graph/visitor/FoldConstantExprVisitor.cpp b/src/graph/visitor/FoldConstantExprVisitor.cpp index 16633288665..e0cf8a2a375 100644 --- a/src/graph/visitor/FoldConstantExprVisitor.cpp +++ b/src/graph/visitor/FoldConstantExprVisitor.cpp @@ -61,6 +61,27 @@ void FoldConstantExprVisitor::visit(ArithmeticExpression *expr) { void FoldConstantExprVisitor::visit(RelationalExpression *expr) { visitBinaryExpr(expr); + if (expr->kind() == Expression::Kind::kRelIn) { + auto rhs = static_cast(expr)->right(); + if (rhs->kind() == Expression::Kind::kConstant) { + auto ce = static_cast(rhs); + auto v = ce->value(); + // Convert to more efficient C++ data structure based on the size of the container expression + if (v.isList()) { + auto &list = v.getList().values; + if (list.size() >= 16) { + ce->setValue(Set(std::unordered_set{std::make_move_iterator(list.begin()), + std::make_move_iterator(list.end())})); + } + } else if (v.isSet()) { + auto &set = v.getSet().values; + if (set.size() < 16) { + ce->setValue(List(std::vector{std::make_move_iterator(set.begin()), + std::make_move_iterator(set.end())})); + } + } + } + } } void FoldConstantExprVisitor::visit(SubscriptExpression *expr) { @@ -348,8 +369,7 @@ void FoldConstantExprVisitor::visitBinaryExpr(BinaryExpression *expr) { } Expression *FoldConstantExprVisitor::fold(Expression *expr) { - // Container expression should remain the same type after being folded - if (expr->isContainerExpr() || !status_.ok()) { + if (!status_.ok()) { return expr; } diff --git a/src/graph/visitor/test/FoldConstantExprVisitorTest.cpp b/src/graph/visitor/test/FoldConstantExprVisitorTest.cpp index ee5d27b69ab..57e5e0abd4c 100644 --- a/src/graph/visitor/test/FoldConstantExprVisitorTest.cpp +++ b/src/graph/visitor/test/FoldConstantExprVisitorTest.cpp @@ -215,5 +215,59 @@ TEST_F(FoldConstantExprVisitorTest, TestPathBuild) { expected->add(ConstantExpression::make(pool, "TOM")); ASSERT_EQ(*expr, *expected); } + +TEST_F(FoldConstantExprVisitorTest, TestRelIn) { + auto items = ExpressionList::make(pool); + items->add(ArithmeticExpression::makeAdd( + pool, ConstantExpression::make(pool, 1), ConstantExpression::make(pool, 2))); + items->add(ConstantExpression::make(pool, 4)); + items->add(ConstantExpression::make(pool, false)); + items->add(ConstantExpression::make(pool, "3")); + items->add(ConstantExpression::make(pool, 5)); + items->add(ConstantExpression::make(pool, 3)); + auto lhs = ConstantExpression::make(pool, 3); + auto listExpr = ListExpression::make(pool, items); + auto inListExpr = RelationalExpression::makeIn(pool, lhs, listExpr); + FoldConstantExprVisitor foldVisitor(pool); + inListExpr->accept(&foldVisitor); + + auto expectInListExpr = RelationalExpression::makeIn( + pool, lhs, ConstantExpression::make(pool, List(std::vector{3, 4, false, "3", 5, 3}))); + EXPECT_EQ(*inListExpr, *expectInListExpr); + auto setExpr = SetExpression::make(pool, items); + auto inSetExpr = RelationalExpression::makeIn(pool, lhs, setExpr); + inSetExpr->accept(&foldVisitor); + auto expectInSetExpr = RelationalExpression::makeIn( + pool, lhs, ConstantExpression::make(pool, List(std::vector{5, "3", false, 4, 3}))); + EXPECT_EQ(*inSetExpr, *expectInSetExpr); + + for (int i = 10u; i < 20; ++i) { + items->add(ConstantExpression::make(pool, i)); + } + + auto bigListExpr = ListExpression::make(pool, items); + auto inBigListExpr = RelationalExpression::makeIn(pool, lhs, bigListExpr); + + inBigListExpr->accept(&foldVisitor); + + auto expectInBigListExpr = RelationalExpression::makeIn( + pool, + lhs, + ConstantExpression::make(pool, + Set(std::unordered_set{ + 19, 18, 3, 16, 4, 17, false, 13, "3", 5, 10, 11, 12, 14, 15}))); + EXPECT_EQ(*inBigListExpr, *expectInBigListExpr); + auto bigSetExpr = SetExpression::make(pool, items); + auto inBigSetExpr = RelationalExpression::makeIn(pool, lhs, bigSetExpr); + inBigSetExpr->accept(&foldVisitor); + auto expectInBigSetExpr = RelationalExpression::makeIn( + pool, + lhs, + ConstantExpression::make( + pool, + List(std::vector{19, 18, 16, 15, 14, 12, 11, 10, 5, 13, "3", 17, false, 4, 3}))); + EXPECT_EQ(*inBigSetExpr, *expectInBigSetExpr); +} + } // namespace graph } // namespace nebula