From 2fd67bcd35cd275d743d31c5e681fba69602bd12 Mon Sep 17 00:00:00 2001 From: Sophie <84560950+Sophie-Xie@users.noreply.github.com> Date: Mon, 17 Jan 2022 11:54:56 +0800 Subject: [PATCH] Cherry pick to v3.0.0 (0110-0114) (#3730) * add LogMonitor to check log disk freeBytes and change log level when space is almost full (#3576) * add LogMonitor to check the log disk is full * fix comments * add comments for default flags Co-authored-by: yaphet <4414314+darionyaphet@users.noreply.github.com> * Ldbc test cases. (#3537) * Add ldbc test/ * Add all cases. * Fix some test cases. * Fix ldbc cases. * Fix pytest. Co-authored-by: Yee <2520865+yixinglu@users.noreply.github.com> * fix issue 3675 (#3678) * Fix expression rewrite (#3614) * Do not transfer the filter expression if it contains 2 lableAttribute exprs * Fix expression overflow check * Fix rewriteRelExpr * Refactor rewriteRelExpr * Fix log usage * Check whether the minus expression contains string * Add tck cases * Address comments * Fix conflicts * Address comments * modify metrics in conf files * Address comments * fix divide zone should be failed if two zones use the same host (#3699) * Support more check about merge zone arguments (#3703) * fix match index (#3694) * check scheam * add test case * fix graph crash * address comment' ' * add test case * fix error * fix vid select error * fix unit test error * address comment * fix issue 3601 (#3666) Co-authored-by: Nivras <12605142+Nivras@users.noreply.github.com> Co-authored-by: yaphet <4414314+darionyaphet@users.noreply.github.com> Co-authored-by: cpw <13495049+CPWstatic@users.noreply.github.com> Co-authored-by: Yee <2520865+yixinglu@users.noreply.github.com> Co-authored-by: Yichen Wang <18348405+Aiee@users.noreply.github.com> Co-authored-by: jimingquan Co-authored-by: hs.zhang <22708345+cangfengzhs@users.noreply.github.com> --- .github/workflows/pull_request.yml | 5 + conf/nebula-graphd.conf.default | 5 +- conf/nebula-graphd.conf.production | 5 +- src/clients/meta/MetaClient.cpp | 42 +- src/clients/meta/MetaClient.h | 8 +- src/codec/RowWriterV2.cpp | 6 +- src/codec/test/ResultSchemaProvider.cpp | 6 +- src/codec/test/ResultSchemaProvider.h | 6 +- src/codec/test/SchemaWriter.cpp | 10 +- src/common/CMakeLists.txt | 1 + src/common/graph/Response.h | 2 + src/common/log/CMakeLists.txt | 8 + src/common/log/LogMonitor.cpp | 69 +++ src/common/log/LogMonitor.h | 43 ++ src/common/meta/NebulaSchemaProvider.cpp | 4 +- src/common/meta/NebulaSchemaProvider.h | 8 +- src/common/meta/SchemaProviderIf.h | 2 +- src/common/utils/IndexKeyUtils.cpp | 5 +- src/common/utils/test/CMakeLists.txt | 15 + src/daemons/CMakeLists.txt | 1 + src/graph/optimizer/rule/IndexScanRule.cpp | 4 +- src/graph/planner/match/LabelIndexSeek.cpp | 11 +- src/graph/planner/match/VertexIdSeek.cpp | 4 + src/graph/util/ExpressionUtils.cpp | 106 +++- src/graph/util/SchemaUtil.cpp | 8 +- src/graph/util/SchemaUtil.h | 2 +- src/graph/validator/MaintainValidator.cpp | 6 +- src/graph/validator/MutateValidator.cpp | 9 +- src/graph/visitor/DeduceTypeVisitor.cpp | 61 +- src/graph/visitor/VidExtractVisitor.cpp | 52 +- .../visitor/test/FilterTransformTest.cpp | 73 ++- .../test/RewriteRelExprVisitorTest.cpp | 18 + src/interface/common.thrift | 2 + .../processors/parts/CreateSpaceProcessor.cpp | 21 +- .../processors/parts/CreateSpaceProcessor.h | 5 +- .../processors/zone/DivideZoneProcessor.cpp | 9 + .../processors/zone/MergeZoneProcessor.cpp | 11 + src/meta/test/MetaClientTest.cpp | 27 + src/meta/test/ProcessorTest.cpp | 46 +- src/mock/MockData.cpp | 91 ++- src/storage/StorageServer.cpp | 3 + src/storage/StorageServer.h | 3 + src/storage/exec/IndexScanNode.h | 2 +- src/storage/exec/QueryUtils.h | 4 +- src/storage/exec/UpdateNode.h | 4 +- src/storage/test/IndexWithTTLTest.cpp | 7 +- .../ChainAddEdgesLocalProcessor.cpp | 4 +- tests/Makefile | 7 +- tests/common/utils.py | 5 +- tests/conftest.py | 5 + tests/data/ldbc_v0_3_3/config.yaml | 41 ++ tests/tck/conftest.py | 3 + tests/tck/features/match/Base.IntVid.feature | 14 + tests/tck/features/match/Base.feature | 14 + tests/tck/features/match/SeekById.feature | 106 +++- .../features/match/SeekById.intVid.feature | 104 +++- .../features/optimizer/IndexScanRule.feature | 7 +- .../Read.feature | 558 ++++++++++++++++++ .../interactive_workload/ComplexReads.feature | 349 +++++++++++ .../interactive_workload/ShortReads.feature | 131 ++++ tests/tck/steps/test_ldbc.py | 7 + 61 files changed, 1927 insertions(+), 268 deletions(-) create mode 100644 src/common/log/CMakeLists.txt create mode 100644 src/common/log/LogMonitor.cpp create mode 100644 src/common/log/LogMonitor.h create mode 100644 tests/data/ldbc_v0_3_3/config.yaml create mode 100644 tests/tck/ldbc/business_intelligence_workload/Read.feature create mode 100644 tests/tck/ldbc/interactive_workload/ComplexReads.feature create mode 100644 tests/tck/ldbc/interactive_workload/ShortReads.feature create mode 100644 tests/tck/steps/test_ldbc.py diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index fb614a7b022..974509d8d35 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -170,6 +170,11 @@ jobs: make RM_DIR=false DEBUG=false J=${{ steps.cmake.outputs.j }} tck working-directory: tests/ timeout-minutes: 60 + - name: LDBC + run: | + make RM_DIR=false DEBUG=false J=${{ steps.cmake.outputs.j }} ldbc + working-directory: tests/ + timeout-minutes: 60 - name: Down cluster run: | make RM_DIR=false down diff --git a/conf/nebula-graphd.conf.default b/conf/nebula-graphd.conf.default index 65d9d3604fb..dacf79fc9a7 100644 --- a/conf/nebula-graphd.conf.default +++ b/conf/nebula-graphd.conf.default @@ -84,8 +84,9 @@ # System memory high watermark ratio, cancel the memory checking when the ratio greater than 1.0 --system_memory_high_watermark_ratio=0.8 +########## metrics ########## +--enable_space_level_metrics=false + ########## experimental feature ########## # if use experimental features --enable_experimental_feature=false - ---enable_space_level_metrics=false diff --git a/conf/nebula-graphd.conf.production b/conf/nebula-graphd.conf.production index 0d178d03a56..aed8350b1f6 100644 --- a/conf/nebula-graphd.conf.production +++ b/conf/nebula-graphd.conf.production @@ -83,8 +83,9 @@ # System memory high watermark ratio, cancel the memory checking when the ratio greater than 1.0 --system_memory_high_watermark_ratio=0.8 +########## metrics ########## +--enable_space_level_metrics=false + ########## experimental feature ########## # if use experimental features --enable_experimental_feature=false - ---enable_space_level_metrics=false diff --git a/src/clients/meta/MetaClient.cpp b/src/clients/meta/MetaClient.cpp index db75a884c8f..7c50f8ab120 100644 --- a/src/clients/meta/MetaClient.cpp +++ b/src/clients/meta/MetaClient.cpp @@ -364,8 +364,8 @@ bool MetaClient::loadData() { GraphSpaceID spaceId = spaceInfo.first; std::shared_ptr info = spaceInfo.second; std::shared_ptr infoDeepCopy = std::make_shared(*info); - infoDeepCopy->tagSchemas_ = buildTagSchemas(infoDeepCopy->tagItemVec_, &infoDeepCopy->pool_); - infoDeepCopy->edgeSchemas_ = buildEdgeSchemas(infoDeepCopy->edgeItemVec_, &infoDeepCopy->pool_); + infoDeepCopy->tagSchemas_ = buildTagSchemas(infoDeepCopy->tagItemVec_); + infoDeepCopy->edgeSchemas_ = buildEdgeSchemas(infoDeepCopy->edgeItemVec_); infoDeepCopy->tagIndexes_ = buildIndexes(infoDeepCopy->tagIndexItemVec_); infoDeepCopy->edgeIndexes_ = buildIndexes(infoDeepCopy->edgeIndexItemVec_); newMetaData->localCache_[spaceId] = infoDeepCopy; @@ -396,14 +396,14 @@ bool MetaClient::loadData() { return true; } -TagSchemas MetaClient::buildTagSchemas(std::vector tagItemVec, ObjectPool* pool) { +TagSchemas MetaClient::buildTagSchemas(std::vector tagItemVec) { TagSchemas tagSchemas; TagID lastTagId = -1; for (auto& tagIt : tagItemVec) { // meta will return the different version from new to old auto schema = std::make_shared(tagIt.get_version()); for (const auto& colIt : tagIt.get_schema().get_columns()) { - addSchemaField(schema.get(), colIt, pool); + addSchemaField(schema.get(), colIt); } // handle schema property schema->setProp(tagIt.get_schema().get_schema_prop()); @@ -417,8 +417,7 @@ TagSchemas MetaClient::buildTagSchemas(std::vector tagItemVec, Ob return tagSchemas; } -EdgeSchemas MetaClient::buildEdgeSchemas(std::vector edgeItemVec, - ObjectPool* pool) { +EdgeSchemas MetaClient::buildEdgeSchemas(std::vector edgeItemVec) { EdgeSchemas edgeSchemas; std::unordered_set> edges; EdgeType lastEdgeType = -1; @@ -426,7 +425,7 @@ EdgeSchemas MetaClient::buildEdgeSchemas(std::vector edgeItemVec // meta will return the different version from new to old auto schema = std::make_shared(edgeIt.get_version()); for (const auto& col : edgeIt.get_schema().get_columns()) { - MetaClient::addSchemaField(schema.get(), col, pool); + MetaClient::addSchemaField(schema.get(), col); } // handle shcem property schema->setProp(edgeIt.get_schema().get_schema_prop()); @@ -440,32 +439,19 @@ EdgeSchemas MetaClient::buildEdgeSchemas(std::vector edgeItemVec return edgeSchemas; } -void MetaClient::addSchemaField(NebulaSchemaProvider* schema, - const cpp2::ColumnDef& col, - ObjectPool* pool) { +void MetaClient::addSchemaField(NebulaSchemaProvider* schema, const cpp2::ColumnDef& col) { bool hasDef = col.default_value_ref().has_value(); auto& colType = col.get_type(); size_t len = colType.type_length_ref().has_value() ? *colType.get_type_length() : 0; cpp2::GeoShape geoShape = colType.geo_shape_ref().has_value() ? *colType.get_geo_shape() : cpp2::GeoShape::ANY; bool nullable = col.nullable_ref().has_value() ? *col.get_nullable() : false; - Expression* defaultValueExpr = nullptr; + std::string encoded; if (hasDef) { - auto encoded = *col.get_default_value(); - defaultValueExpr = Expression::decode(pool, folly::StringPiece(encoded.data(), encoded.size())); - - if (defaultValueExpr == nullptr) { - LOG(ERROR) << "Wrong expr default value for column name: " << col.get_name(); - hasDef = false; - } + encoded = *col.get_default_value(); } - schema->addField(col.get_name(), - colType.get_type(), - len, - nullable, - hasDef ? defaultValueExpr : nullptr, - geoShape); + schema->addField(col.get_name(), colType.get_type(), len, nullable, encoded, geoShape); } bool MetaClient::loadSchemas(GraphSpaceID spaceId, @@ -493,9 +479,9 @@ bool MetaClient::loadSchemas(GraphSpaceID spaceId, auto edgeItemVec = edgeRet.value(); allEdgeMap[spaceId] = {}; spaceInfoCache->tagItemVec_ = tagItemVec; - spaceInfoCache->tagSchemas_ = buildTagSchemas(tagItemVec, &spaceInfoCache->pool_); + spaceInfoCache->tagSchemas_ = buildTagSchemas(tagItemVec); spaceInfoCache->edgeItemVec_ = edgeItemVec; - spaceInfoCache->edgeSchemas_ = buildEdgeSchemas(edgeItemVec, &spaceInfoCache->pool_); + spaceInfoCache->edgeSchemas_ = buildEdgeSchemas(edgeItemVec); for (auto& tagIt : tagItemVec) { tagNameIdMap.emplace(std::make_pair(spaceId, tagIt.get_tag_name()), tagIt.get_tag_id()); @@ -859,6 +845,10 @@ Status MetaClient::handleResponse(const RESP& resp) { return Status::Error("Invalid param!"); case nebula::cpp2::ErrorCode::E_WRONGCLUSTER: return Status::Error("Wrong cluster!"); + case nebula::cpp2::ErrorCode::E_ZONE_NOT_ENOUGH: + return Status::Error("Zone not enough!"); + case nebula::cpp2::ErrorCode::E_ZONE_IS_EMPTY: + return Status::Error("Zone is empty!"); case nebula::cpp2::ErrorCode::E_STORE_FAILURE: return Status::Error("Store failure!"); case nebula::cpp2::ErrorCode::E_STORE_SEGMENT_ILLEGAL: diff --git a/src/clients/meta/MetaClient.h b/src/clients/meta/MetaClient.h index 8ac7e42ff02..3504898aba1 100644 --- a/src/clients/meta/MetaClient.h +++ b/src/clients/meta/MetaClient.h @@ -91,8 +91,6 @@ struct SpaceInfoCache { std::vector edgeIndexItemVec_; Indexes edgeIndexes_; Listeners listeners_; - // objPool used to decode when adding field - ObjectPool pool_; std::unordered_map termOfPartition_; SpaceInfoCache() = default; @@ -816,10 +814,10 @@ class MetaClient { ServiceClientsList serviceClientList_; }; - void addSchemaField(NebulaSchemaProvider* schema, const cpp2::ColumnDef& col, ObjectPool* pool); + void addSchemaField(NebulaSchemaProvider* schema, const cpp2::ColumnDef& col); - TagSchemas buildTagSchemas(std::vector tagItemVec, ObjectPool* pool); - EdgeSchemas buildEdgeSchemas(std::vector edgeItemVec, ObjectPool* pool); + TagSchemas buildTagSchemas(std::vector tagItemVec); + EdgeSchemas buildEdgeSchemas(std::vector edgeItemVec); std::unique_ptr bgThread_; SpaceNameIdMap spaceIndexByName_; diff --git a/src/codec/RowWriterV2.cpp b/src/codec/RowWriterV2.cpp index 639b4854965..9e0818521e5 100644 --- a/src/codec/RowWriterV2.cpp +++ b/src/codec/RowWriterV2.cpp @@ -815,7 +815,9 @@ WriteResult RowWriterV2::checkUnsetFields() noexcept { WriteResult r = WriteResult::SUCCEEDED; if (field->hasDefault()) { - auto expr = field->defaultValue()->clone(); + ObjectPool pool; + auto& exprStr = field->defaultValue(); + auto expr = Expression::decode(&pool, folly::StringPiece(exprStr.data(), exprStr.size())); auto defVal = Expression::eval(expr, expCtx); switch (defVal.type()) { case Value::Type::NULLVALUE: @@ -851,7 +853,7 @@ WriteResult RowWriterV2::checkUnsetFields() noexcept { default: LOG(FATAL) << "Unsupported default value type: " << defVal.typeName() << ", default value: " << defVal - << ", default value expr: " << field->defaultValue()->toString(); + << ", default value expr: " << field->defaultValue(); } } else { // Set NULL diff --git a/src/codec/test/ResultSchemaProvider.cpp b/src/codec/test/ResultSchemaProvider.cpp index 7d1037dca1b..20acd6dd071 100644 --- a/src/codec/test/ResultSchemaProvider.cpp +++ b/src/codec/test/ResultSchemaProvider.cpp @@ -22,7 +22,7 @@ ResultSchemaProvider::ResultSchemaField::ResultSchemaField(std::string name, bool nullable, int32_t offset, size_t nullFlagPos, - Expression* defaultValue, + std::string defaultValue, meta::cpp2::GeoShape geoShape) : name_(std::move(name)), type_(type), @@ -42,14 +42,14 @@ PropertyType ResultSchemaProvider::ResultSchemaField::type() const { } bool ResultSchemaProvider::ResultSchemaField::hasDefault() const { - return defaultValue_ != nullptr; + return defaultValue_ != ""; } bool ResultSchemaProvider::ResultSchemaField::nullable() const { return nullable_; } -Expression* ResultSchemaProvider::ResultSchemaField::defaultValue() const { +const std::string& ResultSchemaProvider::ResultSchemaField::defaultValue() const { return defaultValue_; } diff --git a/src/codec/test/ResultSchemaProvider.h b/src/codec/test/ResultSchemaProvider.h index c0c043f6cd4..c780cf34eb5 100644 --- a/src/codec/test/ResultSchemaProvider.h +++ b/src/codec/test/ResultSchemaProvider.h @@ -21,14 +21,14 @@ class ResultSchemaProvider : public meta::SchemaProviderIf { bool nullable, int32_t offset, size_t nullFlagPos, - Expression* defaultValue = nullptr, + std::string defaultValue = "", meta::cpp2::GeoShape = meta::cpp2::GeoShape::ANY); const char* name() const override; nebula::cpp2::PropertyType type() const override; bool nullable() const override; bool hasDefault() const override; - Expression* defaultValue() const override; + const std::string& defaultValue() const override; size_t size() const override; size_t offset() const override; size_t nullFlagPos() const override; @@ -41,7 +41,7 @@ class ResultSchemaProvider : public meta::SchemaProviderIf { bool nullable_; int32_t offset_; size_t nullFlagPos_; - Expression* defaultValue_; + std::string defaultValue_; meta::cpp2::GeoShape geoShape_; }; diff --git a/src/codec/test/SchemaWriter.cpp b/src/codec/test/SchemaWriter.cpp index 9731319c8fd..d5c4ddf76bf 100644 --- a/src/codec/test/SchemaWriter.cpp +++ b/src/codec/test/SchemaWriter.cpp @@ -90,8 +90,14 @@ SchemaWriter& SchemaWriter::appendCol(folly::StringPiece name, nullFlagPos = numNullableFields_++; } - columns_.emplace_back( - name.toString(), type, size, nullable, offset, nullFlagPos, defaultValue, geoShape); + columns_.emplace_back(name.toString(), + type, + size, + nullable, + offset, + nullFlagPos, + defaultValue ? defaultValue->encode() : "", + geoShape); nameIndex_.emplace(std::make_pair(hash, columns_.size() - 1)); return *this; diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index a3470a8baa0..b0abdc68f49 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -28,3 +28,4 @@ nebula_add_subdirectory(ssl) nebula_add_subdirectory(geo) nebula_add_subdirectory(memory) nebula_add_subdirectory(id) +nebula_add_subdirectory(log) diff --git a/src/common/graph/Response.h b/src/common/graph/Response.h index dab1d00dbf3..ee7c0165bdd 100644 --- a/src/common/graph/Response.h +++ b/src/common/graph/Response.h @@ -75,6 +75,8 @@ X(E_CONFLICT, -2008) \ X(E_INVALID_PARM, -2009) \ X(E_WRONGCLUSTER, -2010) \ + X(E_ZONE_NOT_ENOUGH, -2011) \ + X(E_ZONE_IS_EMPTY, -2012) \ \ X(E_STORE_FAILURE, -2021) \ X(E_STORE_SEGMENT_ILLEGAL, -2022) \ diff --git a/src/common/log/CMakeLists.txt b/src/common/log/CMakeLists.txt new file mode 100644 index 00000000000..633a091d1eb --- /dev/null +++ b/src/common/log/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright (c) 2021 vesoft inc. All rights reserved. +# +# This source code is licensed under Apache 2.0 License. + +nebula_add_library( + log_monitor_obj OBJECT + LogMonitor.cpp +) diff --git a/src/common/log/LogMonitor.cpp b/src/common/log/LogMonitor.cpp new file mode 100644 index 00000000000..30f5f995af1 --- /dev/null +++ b/src/common/log/LogMonitor.cpp @@ -0,0 +1,69 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ +#include "common/log/LogMonitor.h" + +namespace nebula { + +// default min_warn is 256M, disk freebytes < 256M will change LOG_LEVEL to WARNING +DEFINE_uint64(log_min_reserved_bytes_to_warn, + 256 * (1UL << 20), + "if freebytes in logdir less than this, will change log level to WARN"); + +// default min_error is 64M, disk freebytes < 64M will change LOG_LEVEL to ERROR +DEFINE_uint64(log_min_reserved_bytes_to_error, + 64 * (1UL << 20), + "if freebytes in logdir less than this, will change log level to ERROR"); + +// default min_fatal is 4M, disk freebytes < 4M will change LOG_LEVEL to FATAL +DEFINE_uint64(log_min_reserved_bytes_to_fatal, + 4 * (1UL << 20), + "if freebytes in logdir less than this, will change log level to FATAL"); + +// default check log_disk interval is 10s +DEFINE_int32(log_disk_check_interval_secs, 10, "interval to check free space of log path"); + +void LogMonitor::getLogDiskFreeByte() { + boost::system::error_code ec; + auto info = boost::filesystem::space(FLAGS_log_dir, ec); + if (!ec) { + freeByte_ = info.available; + } else { + LOG(WARNING) << "Get filesystem info of logdir: " << FLAGS_log_dir << " failed"; + } +} + +void LogMonitor::checkAndChangeLogLevel() { + getLogDiskFreeByte(); + + if (FLAGS_log_min_reserved_bytes_to_fatal > FLAGS_log_min_reserved_bytes_to_error || + FLAGS_log_min_reserved_bytes_to_fatal > FLAGS_log_min_reserved_bytes_to_warn || + FLAGS_log_min_reserved_bytes_to_error > FLAGS_log_min_reserved_bytes_to_warn) { + LOG(ERROR) << "Get Invalid config in LogMonitor, the LogMonitor config should be " + << "FLAGS_log_min_reserved_bytes_to_warn >" + << "FLAGS_log_min_reserved_bytes_to_error > FLAGS_log_min_reserved_bytes_to_fatal;"; + return; + } + + if (freeByte_ < FLAGS_log_min_reserved_bytes_to_fatal) { + LOG(ERROR) << "log disk freebyte is less than " << FLAGS_log_min_reserved_bytes_to_fatal + << ", change log level to FATAL"; + FLAGS_minloglevel = google::GLOG_FATAL; + } else if (freeByte_ < FLAGS_log_min_reserved_bytes_to_error) { + LOG(ERROR) << "log disk freebyte is less than " << FLAGS_log_min_reserved_bytes_to_error + << ", change log level to ERROR"; + FLAGS_minloglevel = google::GLOG_ERROR; + } else if (freeByte_ < FLAGS_log_min_reserved_bytes_to_warn) { + LOG(ERROR) << "log disk freebyte is less than " << FLAGS_log_min_reserved_bytes_to_warn + << ", change log level to WARNING"; + FLAGS_minloglevel = google::GLOG_WARNING; + } else { + // if freeByte_ is bigger than every min_log_flag, reset the FLAGS_minloglevel to old value + if (FLAGS_minloglevel != oldMinLogLevel_) { + FLAGS_minloglevel = oldMinLogLevel_; + } + } +} + +} // namespace nebula diff --git a/src/common/log/LogMonitor.h b/src/common/log/LogMonitor.h new file mode 100644 index 00000000000..86e206e1930 --- /dev/null +++ b/src/common/log/LogMonitor.h @@ -0,0 +1,43 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ +#pragma once + +#include +#include + +#include "common/thread/GenericWorker.h" + +namespace nebula { + +DECLARE_uint64(log_min_reserved_bytes_to_warn); +DECLARE_uint64(log_min_reserved_bytes_to_error); +DECLARE_uint64(log_min_reserved_bytes_to_fatal); +DECLARE_int32(log_disk_check_interval_secs); + +class LogMonitor { + public: + LogMonitor() : oldMinLogLevel_(FLAGS_minloglevel), freeByte_(1UL << 60) { + worker_ = std::make_shared(); + CHECK(worker_->start()); + worker_->addRepeatTask( + FLAGS_log_disk_check_interval_secs * 1000, &LogMonitor::checkAndChangeLogLevel, this); + } + + ~LogMonitor() { + worker_->stop(); + worker_->wait(); + } + + void getLogDiskFreeByte(); + + void checkAndChangeLogLevel(); + + private: + int32_t oldMinLogLevel_; + std::shared_ptr worker_; + std::atomic_uint64_t freeByte_; +}; + +} // namespace nebula diff --git a/src/common/meta/NebulaSchemaProvider.cpp b/src/common/meta/NebulaSchemaProvider.cpp index 535766e5a06..a6499db99e8 100644 --- a/src/common/meta/NebulaSchemaProvider.cpp +++ b/src/common/meta/NebulaSchemaProvider.cpp @@ -98,7 +98,7 @@ void NebulaSchemaProvider::addField(folly::StringPiece name, PropertyType type, size_t fixedStrLen, bool nullable, - Expression* defaultValue, + std::string defaultValue, cpp2::GeoShape geoShape) { size_t size = fieldSize(type, fixedStrLen); @@ -116,7 +116,7 @@ void NebulaSchemaProvider::addField(folly::StringPiece name, fields_.emplace_back(name.toString(), type, nullable, - defaultValue != nullptr, + defaultValue != "", defaultValue, size, offset, diff --git a/src/common/meta/NebulaSchemaProvider.h b/src/common/meta/NebulaSchemaProvider.h index 1a30952b11f..1dc841ddee6 100644 --- a/src/common/meta/NebulaSchemaProvider.h +++ b/src/common/meta/NebulaSchemaProvider.h @@ -25,7 +25,7 @@ class NebulaSchemaProvider : public SchemaProviderIf { nebula::cpp2::PropertyType type, bool nullable, bool hasDefault, - Expression* defaultValue, + std::string defaultValue, size_t size, size_t offset, size_t nullFlagPos, @@ -56,7 +56,7 @@ class NebulaSchemaProvider : public SchemaProviderIf { return hasDefault_; } - Expression* defaultValue() const override { + const std::string& defaultValue() const override { return defaultValue_; } @@ -82,7 +82,7 @@ class NebulaSchemaProvider : public SchemaProviderIf { nebula::cpp2::PropertyType type_; bool nullable_; bool hasDefault_; - Expression* defaultValue_; + std::string defaultValue_; size_t size_; size_t offset_; size_t nullFlagPos_; @@ -113,7 +113,7 @@ class NebulaSchemaProvider : public SchemaProviderIf { nebula::cpp2::PropertyType type, size_t fixedStrLen = 0, bool nullable = false, - Expression* defaultValue = nullptr, + std::string defaultValue = "", cpp2::GeoShape geoShape = cpp2::GeoShape::ANY); static std::size_t fieldSize(nebula::cpp2::PropertyType type, std::size_t fixedStrLimit); diff --git a/src/common/meta/SchemaProviderIf.h b/src/common/meta/SchemaProviderIf.h index 294fad85f24..3120412cd88 100644 --- a/src/common/meta/SchemaProviderIf.h +++ b/src/common/meta/SchemaProviderIf.h @@ -24,7 +24,7 @@ class SchemaProviderIf { virtual nebula::cpp2::PropertyType type() const = 0; virtual bool nullable() const = 0; virtual bool hasDefault() const = 0; - virtual Expression* defaultValue() const = 0; + virtual const std::string& defaultValue() const = 0; // This method returns the number of bytes the field will occupy // when the field is persisted on the storage medium // For the variant length string, the size will return 8 diff --git a/src/common/utils/IndexKeyUtils.cpp b/src/common/utils/IndexKeyUtils.cpp index 29b34fb7913..3707d85f52e 100644 --- a/src/common/utils/IndexKeyUtils.cpp +++ b/src/common/utils/IndexKeyUtils.cpp @@ -7,6 +7,7 @@ #include +#include "common/expression/Expression.h" #include "common/geo/GeoIndex.h" #include "common/utils/DefaultValueContext.h" @@ -212,7 +213,9 @@ StatusOr IndexKeyUtils::readValueWithLatestSche(RowReader* reader, } if (field->hasDefault()) { DefaultValueContext expCtx; - auto expr = field->defaultValue()->clone(); + ObjectPool pool; + auto& exprStr = field->defaultValue(); + auto expr = Expression::decode(&pool, folly::StringPiece(exprStr.data(), exprStr.size())); return Expression::eval(expr, expCtx); } else if (field->nullable()) { return NullType::__NULL__; diff --git a/src/common/utils/test/CMakeLists.txt b/src/common/utils/test/CMakeLists.txt index 7cbc4c2b045..dcbb1ff0533 100644 --- a/src/common/utils/test/CMakeLists.txt +++ b/src/common/utils/test/CMakeLists.txt @@ -13,6 +13,11 @@ nebula_add_test( $ $ $ + $ + $ + $ + $ + $ LIBRARIES gtest ${THRIFT_LIBRARIES} @@ -33,6 +38,11 @@ nebula_add_test( $ $ $ + $ + $ + $ + $ + $ LIBRARIES gtest ${THRIFT_LIBRARIES} @@ -56,6 +66,11 @@ nebula_add_test( $ $ $ + $ + $ + $ + $ + $ LIBRARIES gtest ${THRIFT_LIBRARIES} diff --git a/src/daemons/CMakeLists.txt b/src/daemons/CMakeLists.txt index 026ca7512b9..26dce0e30b4 100644 --- a/src/daemons/CMakeLists.txt +++ b/src/daemons/CMakeLists.txt @@ -34,6 +34,7 @@ set(common_deps $ $ $ + $ ) set(storage_meta_deps diff --git a/src/graph/optimizer/rule/IndexScanRule.cpp b/src/graph/optimizer/rule/IndexScanRule.cpp index 5ad02a164f4..9287768f3af 100644 --- a/src/graph/optimizer/rule/IndexScanRule.cpp +++ b/src/graph/optimizer/rule/IndexScanRule.cpp @@ -224,11 +224,11 @@ inline bool verifyType(const Value& val) { case Value::Type::SET: case Value::Type::MAP: case Value::Type::DATASET: + case Value::Type::DURATION: case Value::Type::GEOGRAPHY: // TODO(jie) case Value::Type::PATH: { - DLOG(FATAL) << "Not supported value type " << val.type() << "for index."; return false; - } break; + } default: { return true; } diff --git a/src/graph/planner/match/LabelIndexSeek.cpp b/src/graph/planner/match/LabelIndexSeek.cpp index 5a2c66153e1..3733b1004a8 100644 --- a/src/graph/planner/match/LabelIndexSeek.cpp +++ b/src/graph/planner/match/LabelIndexSeek.cpp @@ -89,18 +89,20 @@ StatusOr LabelIndexSeek::transformNode(NodeContext* nodeCtx) { if (whereCtx && whereCtx->filter) { auto* filter = whereCtx->filter; const auto& nodeAlias = nodeCtx->info->alias; + const auto& schemaName = nodeCtx->scanInfo.schemaNames.back(); if (filter->kind() == Expression::Kind::kLogicalOr) { auto exprs = ExpressionUtils::collectAll(filter, {Expression::Kind::kLabelTagProperty}); - bool labelMatched = true; + bool matched = true; for (auto* expr : exprs) { auto tagPropExpr = static_cast(expr); - if (static_cast(tagPropExpr->label())->prop() != nodeAlias) { - labelMatched = false; + if (static_cast(tagPropExpr->label())->prop() != nodeAlias || + tagPropExpr->sym() != schemaName) { + matched = false; break; } } - if (labelMatched) { + if (matched) { auto flattenFilter = ExpressionUtils::flattenInnerLogicalExpr(filter); DCHECK_EQ(flattenFilter->kind(), Expression::Kind::kLogicalOr); auto& filterItems = static_cast(flattenFilter)->operands(); @@ -120,7 +122,6 @@ StatusOr LabelIndexSeek::transformNode(NodeContext* nodeCtx) { storage::cpp2::IndexQueryContext ctx; ctx.filter_ref() = Expression::encode(*flattenFilter); scan->setIndexQueryContext({ctx}); - whereCtx.reset(); } } } diff --git a/src/graph/planner/match/VertexIdSeek.cpp b/src/graph/planner/match/VertexIdSeek.cpp index 9d0df2a085d..4510fee81e8 100644 --- a/src/graph/planner/match/VertexIdSeek.cpp +++ b/src/graph/planner/match/VertexIdSeek.cpp @@ -42,6 +42,10 @@ bool VertexIdSeek::matchNode(NodeContext *nodeCtx) { if (vidResult.spec != VidExtractVisitor::VidPattern::Special::kInUsed) { return false; } + if (vidResult.nodes.size() > 1) { + // where id(v) == 'xxx' or id(t) == 'yyy' + return false; + } for (auto &nodeVid : vidResult.nodes) { if (nodeVid.second.kind == VidExtractVisitor::VidPattern::Vids::Kind::kIn) { if (nodeVid.first == node.alias) { diff --git a/src/graph/util/ExpressionUtils.cpp b/src/graph/util/ExpressionUtils.cpp index 77cb5de29a3..415532cddd2 100644 --- a/src/graph/util/ExpressionUtils.cpp +++ b/src/graph/util/ExpressionUtils.cpp @@ -7,8 +7,10 @@ #include #include +#include #include "common/base/ObjectPool.h" +#include "common/expression/ArithmeticExpression.h" #include "common/expression/Expression.h" #include "common/expression/PropertyExpression.h" #include "common/function/AggFunctionManager.h" @@ -415,30 +417,69 @@ Expression *ExpressionUtils::reduceUnaryNotExpr(const Expression *expr) { Expression *ExpressionUtils::rewriteRelExpr(const Expression *expr) { ObjectPool *pool = expr->getObjPool(); - // Match relational expressions containing at least one arithmetic expr - auto matcher = [](const Expression *e) -> bool { - if (e->isRelExpr()) { - auto relExpr = static_cast(e); - if (isEvaluableExpr(relExpr->right())) { - return true; + QueryExpressionContext ctx(nullptr); + + auto checkArithmExpr = [&ctx](const ArithmeticExpression *arithmExpr) -> bool { + auto lExpr = const_cast(arithmExpr->left()); + auto rExpr = const_cast(arithmExpr->right()); + + // If the arithExpr has constant expr as member that is a string, do not rewrite + if (lExpr->kind() == Expression::Kind::kConstant) { + if (lExpr->eval(ctx).isStr()) { + return false; } - // TODO: To match arithmetic expression on both side - auto lExpr = relExpr->left(); - if (lExpr->isArithmeticExpr()) { - auto arithmExpr = static_cast(lExpr); - return isEvaluableExpr(arithmExpr->left()) || isEvaluableExpr(arithmExpr->right()); + } + if (rExpr->kind() == Expression::Kind::kConstant) { + if (rExpr->eval(ctx).isStr()) { + return false; } } - return false; + return isEvaluableExpr(arithmExpr->left()) || isEvaluableExpr(arithmExpr->right()); + }; + + // Match relational expressions following these rules: + // 1. the right operand of rel expr should be evaluable + // 2. the left operand of rel expr should be: + // 2.a an arithmetic expr that does not contains string and has at least one operand that is + // evaluable + // OR + // 2.b an relational expr so that it might could be simplified: + // ((v.age > 40 == true) => (v.age > 40)) + auto matcher = [&checkArithmExpr](const Expression *e) -> bool { + if (!e->isRelExpr()) { + return false; + } + + auto relExpr = static_cast(e); + // Check right operand + bool checkRightOperand = isEvaluableExpr(relExpr->right()); + if (!checkRightOperand) { + return false; + } + + // Check left operand + bool checkLeftOperand = false; + auto lExpr = relExpr->left(); + // Left operand is arithmetical expr + if (lExpr->isArithmeticExpr()) { + auto arithmExpr = static_cast(lExpr); + checkLeftOperand = checkArithmExpr(arithmExpr); + } else if (lExpr->isRelExpr() || + lExpr->kind() == Expression::Kind::kLabelAttribute) { // for expressions that + // contain boolean literals + // such as (v.age <= null) + checkLeftOperand = true; + } + return checkLeftOperand; }; // Simplify relational expressions involving boolean literals - auto simplifyBoolOperand = - [pool](RelationalExpression *relExpr, Expression *lExpr, Expression *rExpr) -> Expression * { - QueryExpressionContext ctx(nullptr); + auto simplifyBoolOperand = [pool, &ctx](RelationalExpression *relExpr, + Expression *lExpr, + Expression *rExpr) -> Expression * { if (rExpr->kind() == Expression::Kind::kConstant) { auto conExpr = static_cast(rExpr); - auto val = conExpr->eval(ctx(nullptr)); + auto val = conExpr->eval(ctx); auto valType = val.type(); // Rewrite to null if the expression contains any operand that is null if (valType == Value::Type::NULLVALUE) { @@ -521,7 +562,7 @@ Expression *ExpressionUtils::rewriteRelExprHelper(const Expression *expr, // case Expression::Kind::kMultiply: // case Expression::Kind::kDivision: default: - LOG(FATAL) << "Unsupported expression kind: " << static_cast(kind); + DLOG(ERROR) << "Unsupported expression kind: " << static_cast(kind); break; } @@ -529,13 +570,35 @@ Expression *ExpressionUtils::rewriteRelExprHelper(const Expression *expr, } StatusOr ExpressionUtils::filterTransform(const Expression *filter) { - auto rewrittenExpr = const_cast(filter); + // Check if any overflow happen before filter tranform + auto initialConstFold = foldConstantExpr(filter); + NG_RETURN_IF_ERROR(initialConstFold); + auto newFilter = initialConstFold.value(); + + // If the filter contains more than one different Label expr, this filter cannot be + // pushed down, such as where v1.player.name == 'xxx' or v2.player.age == 20 + auto propExprs = ExpressionUtils::collectAll(newFilter, {Expression::Kind::kLabel}); + // Deduplicate the list + std::unordered_set dedupPropExprSet; + for (auto &iter : propExprs) { + dedupPropExprSet.emplace(iter->toString()); + } + if (dedupPropExprSet.size() > 1) { + return const_cast(newFilter); + } + // Rewrite relational expression - rewrittenExpr = rewriteRelExpr(rewrittenExpr); + auto rewrittenExpr = rewriteRelExpr(newFilter->clone()); + // Fold constant expression auto constantFoldRes = foldConstantExpr(rewrittenExpr); - NG_RETURN_IF_ERROR(constantFoldRes); + // If errors like overflow happened during the constant fold, stop transferming and return the + // original expression + if (!constantFoldRes.ok()) { + return const_cast(newFilter); + } rewrittenExpr = constantFoldRes.value(); + // Reduce Unary expression rewrittenExpr = reduceUnaryNotExpr(rewrittenExpr); return rewrittenExpr; @@ -880,8 +943,9 @@ Expression::Kind ExpressionUtils::getNegatedArithmeticType(const Expression::Kin return Expression::Kind::kDivision; case Expression::Kind::kDivision: return Expression::Kind::kMultiply; + // There is no oppsite operation to Mod, return itself case Expression::Kind::kMod: - LOG(FATAL) << "Unsupported expression kind: " << static_cast(kind); + return Expression::Kind::kMod; break; default: LOG(FATAL) << "Invalid arithmetic expression kind: " << static_cast(kind); diff --git a/src/graph/util/SchemaUtil.cpp b/src/graph/util/SchemaUtil.cpp index d5b5e5e3934..37ac63e4053 100644 --- a/src/graph/util/SchemaUtil.cpp +++ b/src/graph/util/SchemaUtil.cpp @@ -55,19 +55,19 @@ Status SchemaUtil::validateProps(const std::vector &schemaProp // static std::shared_ptr SchemaUtil::generateSchemaProvider( - ObjectPool *pool, const SchemaVer ver, const meta::cpp2::Schema &schema) { + const SchemaVer ver, const meta::cpp2::Schema &schema) { auto schemaPtr = std::make_shared(ver); for (auto col : schema.get_columns()) { bool hasDef = col.default_value_ref().has_value(); - Expression *defaultValueExpr = nullptr; + std::string exprStr; if (hasDef) { - defaultValueExpr = Expression::decode(pool, *col.default_value_ref()); + exprStr = *col.default_value_ref(); } schemaPtr->addField(col.get_name(), col.get_type().get_type(), col.type.type_length_ref().value_or(0), col.nullable_ref().value_or(false), - hasDef ? defaultValueExpr : nullptr, + exprStr, col.type.geo_shape_ref().value_or(meta::cpp2::GeoShape::ANY)); } return schemaPtr; diff --git a/src/graph/util/SchemaUtil.h b/src/graph/util/SchemaUtil.h index b32e461d130..a93755eb444 100644 --- a/src/graph/util/SchemaUtil.h +++ b/src/graph/util/SchemaUtil.h @@ -29,7 +29,7 @@ class SchemaUtil final { meta::cpp2::Schema& schema); static std::shared_ptr generateSchemaProvider( - ObjectPool* pool, const SchemaVer ver, const meta::cpp2::Schema& schema); + const SchemaVer ver, const meta::cpp2::Schema& schema); static Status setTTLDuration(SchemaPropItem* schemaProp, meta::cpp2::Schema& schema); diff --git a/src/graph/validator/MaintainValidator.cpp b/src/graph/validator/MaintainValidator.cpp index b7ec3d20024..1559bd464b7 100644 --- a/src/graph/validator/MaintainValidator.cpp +++ b/src/graph/validator/MaintainValidator.cpp @@ -157,8 +157,7 @@ Status CreateTagValidator::validateImpl() { NG_RETURN_IF_ERROR(validateColumns(sentence->columnSpecs(), schema)); NG_RETURN_IF_ERROR(SchemaUtil::validateProps(sentence->getSchemaProps(), schema)); // Save the schema in validateContext - auto pool = qctx_->objPool(); - auto schemaPro = SchemaUtil::generateSchemaProvider(pool, 0, schema); + auto schemaPro = SchemaUtil::generateSchemaProvider(0, schema); vctx_->addSchema(name, schemaPro); createCtx_->name = std::move(name); createCtx_->schema = std::move(schema); @@ -180,8 +179,7 @@ Status CreateEdgeValidator::validateImpl() { NG_RETURN_IF_ERROR(validateColumns(sentence->columnSpecs(), schema)); NG_RETURN_IF_ERROR(SchemaUtil::validateProps(sentence->getSchemaProps(), schema)); // Save the schema in validateContext - auto pool = qctx_->objPool(); - auto schemaPro = SchemaUtil::generateSchemaProvider(pool, 0, schema); + auto schemaPro = SchemaUtil::generateSchemaProvider(0, schema); vctx_->addSchema(name, schemaPro); createCtx_->name = std::move(name); createCtx_->schema = std::move(schema); diff --git a/src/graph/validator/MutateValidator.cpp b/src/graph/validator/MutateValidator.cpp index ee32fc00f44..c41e2c03a5e 100644 --- a/src/graph/validator/MutateValidator.cpp +++ b/src/graph/validator/MutateValidator.cpp @@ -251,9 +251,12 @@ Status InsertEdgesValidator::prepareEdges() { auto iter = std::find(propNames_.begin(), propNames_.end(), propName); if (iter == propNames_.end()) { if (field->hasDefault()) { - auto *defaultValue = field->defaultValue(); - DCHECK(!!defaultValue); - auto v = defaultValue->eval(QueryExpressionContext()(nullptr)); + auto &defaultValue = field->defaultValue(); + DCHECK(!defaultValue.empty()); + ObjectPool pool; + auto expr = Expression::decode( + &pool, folly::StringPiece(defaultValue.data(), defaultValue.size())); + auto v = expr->eval(QueryExpressionContext()(nullptr)); entirePropValues.emplace_back(v); } else { if (!field->nullable()) { diff --git a/src/graph/visitor/DeduceTypeVisitor.cpp b/src/graph/visitor/DeduceTypeVisitor.cpp index abb6e0c2539..4adb20079d3 100644 --- a/src/graph/visitor/DeduceTypeVisitor.cpp +++ b/src/graph/visitor/DeduceTypeVisitor.cpp @@ -44,33 +44,40 @@ static const std::unordered_map kConstantValues = { {Value::Type::DURATION, Value(Duration())}, }; -#define DETECT_BIEXPR_TYPE(OP) \ - expr->left()->accept(this); \ - if (!ok()) return; \ - auto left = type_; \ - expr->right()->accept(this); \ - if (!ok()) return; \ - auto right = type_; \ - auto lhs = kConstantValues.find(left); \ - if (lhs == kConstantValues.end()) { \ - status_ = Status::SemanticError("Can't find constant value of `%s' when deduce type.", \ - Value::toString(left).c_str()); \ - return; \ - } \ - auto rhs = kConstantValues.find(right); \ - if (rhs == kConstantValues.end()) { \ - status_ = Status::SemanticError("Can't find constant value of `%s' when deduce type.", \ - Value::toString(right).c_str()); \ - return; \ - } \ - auto detectVal = lhs->second OP rhs->second; \ - if (detectVal.isBadNull()) { \ - std::stringstream ss; \ - ss << "`" << expr->toString() << "' is not a valid expression, " \ - << "can not apply `" << #OP << "' to `" << left << "' and `" << right << "'."; \ - status_ = Status::SemanticError(ss.str()); \ - return; \ - } \ +#define DETECT_BIEXPR_TYPE(OP) \ + expr->left()->accept(this); \ + if (!ok()) return; \ + auto left = type_; \ + expr->right()->accept(this); \ + if (!ok()) return; \ + auto right = type_; \ + if (strcmp(#OP, "-") == 0 && (left == Value::Type::STRING || right == Value::Type::STRING)) { \ + std::stringstream ss; \ + ss << "`" << expr->toString() << "' is not a valid expression, " \ + << "can not apply `" << #OP << "' to `" << left << "' and `" << right << "'."; \ + status_ = Status::SemanticError(ss.str()); \ + return; \ + } \ + auto lhs = kConstantValues.find(left); \ + if (lhs == kConstantValues.end()) { \ + status_ = Status::SemanticError("Can't find constant value of `%s' when deduce type.", \ + Value::toString(left).c_str()); \ + return; \ + } \ + auto rhs = kConstantValues.find(right); \ + if (rhs == kConstantValues.end()) { \ + status_ = Status::SemanticError("Can't find constant value of `%s' when deduce type.", \ + Value::toString(right).c_str()); \ + return; \ + } \ + auto detectVal = lhs->second OP rhs->second; \ + if (detectVal.isBadNull()) { \ + std::stringstream ss; \ + ss << "`" << expr->toString() << "' is not a valid expression, " \ + << "can not apply `" << #OP << "' to `" << left << "' and `" << right << "'."; \ + status_ = Status::SemanticError(ss.str()); \ + return; \ + } \ type_ = detectVal.type() #define DETECT_NARYEXPR_TYPE(OP) \ diff --git a/src/graph/visitor/VidExtractVisitor.cpp b/src/graph/visitor/VidExtractVisitor.cpp index 75dee22a026..5cfaee38518 100644 --- a/src/graph/visitor/VidExtractVisitor.cpp +++ b/src/graph/visitor/VidExtractVisitor.cpp @@ -85,7 +85,6 @@ void VidExtractVisitor::visit(ConstantExpression *expr) { void VidExtractVisitor::visit(UnaryExpression *expr) { if (expr->kind() == Expression::Kind::kUnaryNot) { - // const auto *expr = static_cast(expr); expr->operand()->accept(this); auto operandResult = moveVidPattern(); if (operandResult.spec == VidPattern::Special::kInUsed) { @@ -119,14 +118,15 @@ void VidExtractVisitor::visit(LabelExpression *expr) { } void VidExtractVisitor::visit(LabelAttributeExpression *expr) { - if (expr->kind() == Expression::Kind::kLabelAttribute) { - const auto *labelExpr = static_cast(expr); - vidPattern_ = - VidPattern{VidPattern::Special::kInUsed, - {{labelExpr->left()->toString(), {VidPattern::Vids::Kind::kOtherSource, {}}}}}; - } else { - vidPattern_ = VidPattern{}; - } + const auto &label = expr->left()->toString(); + vidPattern_ = VidPattern{VidPattern::Special::kInUsed, + {{label, {VidPattern::Vids::Kind::kOtherSource, {}}}}}; +} + +void VidExtractVisitor::visit(LabelTagPropertyExpression *expr) { + const auto &label = static_cast(expr->label())->prop(); + vidPattern_ = VidPattern{VidPattern::Special::kInUsed, + {{label, {VidPattern::Vids::Kind::kOtherSource, {}}}}}; } void VidExtractVisitor::visit(ArithmeticExpression *expr) { @@ -144,7 +144,13 @@ void VidExtractVisitor::visit(RelationalExpression *expr) { {{label, {VidPattern::Vids::Kind::kOtherSource, {}}}}}; return; } - + if (expr->left()->kind() == Expression::Kind::kLabelTagProperty) { + const auto *tagPropExpr = static_cast(expr->left()); + const auto &label = static_cast(tagPropExpr->label())->prop(); + vidPattern_ = VidPattern{VidPattern::Special::kInUsed, + {{label, {VidPattern::Vids::Kind::kOtherSource, {}}}}}; + return; + } if (expr->left()->kind() != Expression::Kind::kFunctionCall || expr->right()->kind() != Expression::Kind::kList || !ExpressionUtils::isEvaluableExpr(expr->right())) { @@ -165,7 +171,6 @@ void VidExtractVisitor::visit(RelationalExpression *expr) { VidPattern{VidPattern::Special::kInUsed, {{fCallExpr->args()->args().front()->toString(), {VidPattern::Vids::Kind::kIn, listExpr->eval(ctx(nullptr)).getList()}}}}; - return; } else if (expr->kind() == Expression::Kind::kRelEQ) { // id(V) == vid if (expr->left()->kind() == Expression::Kind::kLabelAttribute) { @@ -175,6 +180,13 @@ void VidExtractVisitor::visit(RelationalExpression *expr) { {{label, {VidPattern::Vids::Kind::kOtherSource, {}}}}}; return; } + if (expr->left()->kind() == Expression::Kind::kLabelTagProperty) { + const auto *tagPropExpr = static_cast(expr->left()); + const auto &label = static_cast(tagPropExpr->label())->prop(); + vidPattern_ = VidPattern{VidPattern::Special::kInUsed, + {{label, {VidPattern::Vids::Kind::kOtherSource, {}}}}}; + return; + } if (expr->left()->kind() != Expression::Kind::kFunctionCall || expr->right()->kind() != Expression::Kind::kConstant) { vidPattern_ = VidPattern{}; @@ -194,10 +206,13 @@ void VidExtractVisitor::visit(RelationalExpression *expr) { vidPattern_ = VidPattern{VidPattern::Special::kInUsed, {{fCallExpr->args()->args().front()->toString(), {VidPattern::Vids::Kind::kIn, List({constExpr->value()})}}}}; - return; } else { - vidPattern_ = VidPattern{}; - return; + if (ExpressionUtils::isPropertyExpr(expr->left())) { + vidPattern_ = VidPattern{VidPattern::Special::kInUsed, + {{"", {VidPattern::Vids::Kind::kOtherSource, {}}}}}; + } else { + vidPattern_ = VidPattern{}; + } } } @@ -213,11 +228,9 @@ void VidExtractVisitor::visit(AttributeExpression *expr) { void VidExtractVisitor::visit(LogicalExpression *expr) { if (expr->kind() == Expression::Kind::kLogicalAnd) { - // const auto *expr = static_cast(expr); std::vector operandsResult; operandsResult.reserve(expr->operands().size()); for (const auto &operand : expr->operands()) { - // operandsResult.emplace_back(reverseEvalVids(operand.get())); operand->accept(this); operandsResult.emplace_back(moveVidPattern()); } @@ -273,8 +286,6 @@ void VidExtractVisitor::visit(LogicalExpression *expr) { vidPattern_ = std::move(inResult); return; } else if (expr->kind() == Expression::Kind::kLogicalOr) { - // const auto *andExpr = static_cast(expr); std::vector operandsResult; operandsResult.reserve(expr->operands().size()); for (const auto &operand : expr->operands()) { @@ -351,11 +362,6 @@ void VidExtractVisitor::visit(MapExpression *expr) { } // property Expression -void VidExtractVisitor::visit(LabelTagPropertyExpression *expr) { - UNUSED(expr); - vidPattern_ = VidPattern{}; -} - void VidExtractVisitor::visit(TagPropertyExpression *expr) { UNUSED(expr); vidPattern_ = VidPattern{}; diff --git a/src/graph/visitor/test/FilterTransformTest.cpp b/src/graph/visitor/test/FilterTransformTest.cpp index 41eab24c428..733e50ba3b2 100644 --- a/src/graph/visitor/test/FilterTransformTest.cpp +++ b/src/graph/visitor/test/FilterTransformTest.cpp @@ -23,27 +23,26 @@ TEST_F(FilterTransformTest, TestComplexExprRewrite) { ASSERT_EQ(*res.value(), *expected) << res.value()->toString() << " vs. " << expected->toString(); } +// If the expression tranformation causes an overflow, it should not be done. TEST_F(FilterTransformTest, TestCalculationOverflow) { - // (v.age - 1 < 9223372036854775807) => overflow + // (v.age - 1 < 9223372036854775807) => unchanged { auto expr = ltExpr(minusExpr(laExpr("v", "age"), constantExpr(1)), constantExpr(9223372036854775807)); auto res = ExpressionUtils::filterTransform(expr); - auto expected = Status::SemanticError( - "result of (9223372036854775807+1) cannot be represented as an " - "integer"); - ASSERT(!res.status().ok()); - ASSERT_EQ(res.status(), expected) << res.status().toString() << " vs. " << expected.toString(); + ASSERT(res.ok()); + auto expected = expr; + ASSERT_EQ(res.value()->toString(), expected->toString()) + << res.value()->toString() << " vs. " << expected->toString(); } - // (v.age + 1 < -9223372036854775808) => overflow + // (v.age + 1 < -9223372036854775808) => unchanged { auto expr = ltExpr(addExpr(laExpr("v", "age"), constantExpr(1)), constantExpr(INT64_MIN)); auto res = ExpressionUtils::filterTransform(expr); - auto expected = Status::SemanticError( - "result of (-9223372036854775808-1) cannot be represented as an " - "integer"); - ASSERT(!res.status().ok()); - ASSERT_EQ(res.status(), expected) << res.status().toString() << " vs. " << expected.toString(); + ASSERT(res.ok()); + auto expected = expr; + ASSERT_EQ(res.value()->toString(), expected->toString()) + << res.value()->toString() << " vs. " << expected->toString(); } // (v.age - 1 < 9223372036854775807 + 1) => overflow { @@ -53,7 +52,7 @@ TEST_F(FilterTransformTest, TestCalculationOverflow) { auto expected = Status::SemanticError( "result of (9223372036854775807+1) cannot be represented as an " "integer"); - ASSERT(!res.status().ok()); + ASSERT(!res.ok()); ASSERT_EQ(res.status(), expected) << res.status().toString() << " vs. " << expected.toString(); } // (v.age + 1 < -9223372036854775808 - 1) => overflow @@ -64,30 +63,52 @@ TEST_F(FilterTransformTest, TestCalculationOverflow) { auto expected = Status::SemanticError( "result of (-9223372036854775808-1) cannot be represented as an " "integer"); - ASSERT(!res.status().ok()); + ASSERT(!res.ok()); ASSERT_EQ(res.status(), expected) << res.status().toString() << " vs. " << expected.toString(); } - // !!!(v.age - 1 < 9223372036854775807) => overflow + // !!!(v.age - 1 < 9223372036854775807) => unchanged { auto expr = notExpr(notExpr(notExpr(ltExpr(minusExpr(laExpr("v", "age"), constantExpr(1)), constantExpr(9223372036854775807))))); auto res = ExpressionUtils::filterTransform(expr); - auto expected = Status::SemanticError( - "result of (9223372036854775807+1) cannot be represented as an " - "integer"); - ASSERT(!res.status().ok()); - ASSERT_EQ(res.status(), expected) << res.status().toString() << " vs. " << expected.toString(); + ASSERT(res.ok()); + auto expected = expr; + ASSERT_EQ(res.value()->toString(), expected->toString()) + << res.value()->toString() << " vs. " << expected->toString(); } - // !!!(v.age + 1 < -9223372036854775808) => overflow + // !!!(v.age + 1 < -9223372036854775808) => unchanged { auto expr = notExpr(notExpr( notExpr(ltExpr(addExpr(laExpr("v", "age"), constantExpr(1)), constantExpr(INT64_MIN))))); auto res = ExpressionUtils::filterTransform(expr); - auto expected = Status::SemanticError( - "result of (-9223372036854775808-1) cannot be represented as an " - "integer"); - ASSERT(!res.status().ok()); - ASSERT_EQ(res.status(), expected) << res.status().toString() << " vs. " << expected.toString(); + ASSERT(res.ok()); + auto expected = expr; + ASSERT_EQ(res.value()->toString(), expected->toString()) + << res.value()->toString() << " vs. " << expected->toString(); + } +} + +TEST_F(FilterTransformTest, TestNoRewrite) { + // Do not rewrite if the filter contains more than one different Label expr + { + // (v.age - 1 < v2.age + 2) => Unchanged + auto expr = ltExpr(minusExpr(laExpr("v", "age"), constantExpr(1)), + addExpr(laExpr("v2", "age"), constantExpr(40))); + auto res = ExpressionUtils::filterTransform(expr); + ASSERT(res.ok()); + auto expected = expr; + ASSERT_EQ(res.value()->toString(), expected->toString()) + << res.value()->toString() << " vs. " << expected->toString(); + } + // Do not rewrite if the arithmetic expression contains string/char constant + { + // (v.name - "ab" < "name") => Unchanged + auto expr = ltExpr(minusExpr(laExpr("v", "name"), constantExpr("ab")), constantExpr("name")); + auto res = ExpressionUtils::filterTransform(expr); + ASSERT(res.ok()); + auto expected = expr; + ASSERT_EQ(res.value()->toString(), expected->toString()) + << res.value()->toString() << " vs. " << expected->toString(); } } diff --git a/src/graph/visitor/test/RewriteRelExprVisitorTest.cpp b/src/graph/visitor/test/RewriteRelExprVisitorTest.cpp index 1ca02836784..9962844fc14 100644 --- a/src/graph/visitor/test/RewriteRelExprVisitorTest.cpp +++ b/src/graph/visitor/test/RewriteRelExprVisitorTest.cpp @@ -234,5 +234,23 @@ TEST_F(RewriteRelExprVisitorTest, TestContainer) { } } +TEST_F(RewriteRelExprVisitorTest, TestArithmeticalExprWithStr) { + // Do not rewrite if the arithmetic expression contains string/char constant + { + // (v.name - "ab" < "name") => Unchanged + auto expr = ltExpr(minusExpr(laExpr("v", "name"), constantExpr("ab")), constantExpr("name")); + auto res = ExpressionUtils::rewriteRelExpr(expr); + auto expected = expr; + ASSERT_EQ(*res, *expected) << res->toString() << " vs. " << expected->toString(); + } + { + // (v.name + "ab" < "name") => Unchanged + auto expr = ltExpr(addExpr(laExpr("v", "name"), constantExpr("ab")), constantExpr("name")); + auto res = ExpressionUtils::rewriteRelExpr(expr); + auto expected = expr; + ASSERT_EQ(*res, *expected) << res->toString() << " vs. " << expected->toString(); + } +} + } // namespace graph } // namespace nebula diff --git a/src/interface/common.thrift b/src/interface/common.thrift index d21f704027a..3075f3bd051 100644 --- a/src/interface/common.thrift +++ b/src/interface/common.thrift @@ -358,6 +358,8 @@ enum ErrorCode { E_CONFLICT = -2008, E_INVALID_PARM = -2009, E_WRONGCLUSTER = -2010, + E_ZONE_NOT_ENOUGH = -2011, + E_ZONE_IS_EMPTY = -2012, E_STORE_FAILURE = -2021, E_STORE_SEGMENT_ILLEGAL = -2022, diff --git a/src/meta/processors/parts/CreateSpaceProcessor.cpp b/src/meta/processors/parts/CreateSpaceProcessor.cpp index a2b3b0cf396..40ee13b1854 100644 --- a/src/meta/processors/parts/CreateSpaceProcessor.cpp +++ b/src/meta/processors/parts/CreateSpaceProcessor.cpp @@ -146,7 +146,7 @@ void CreateSpaceProcessor::process(const cpp2::CreateSpaceReq& req) { int32_t zoneNum = zones.size(); if (replicaFactor > zoneNum) { LOG(ERROR) << "Replication number should less than or equal to zone number."; - handleErrorCode(nebula::cpp2::ErrorCode::E_INVALID_PARM); + handleErrorCode(nebula::cpp2::ErrorCode::E_ZONE_NOT_ENOUGH); onFinished(); return; } @@ -234,16 +234,16 @@ void CreateSpaceProcessor::process(const cpp2::CreateSpaceReq& req) { auto pickedZones = std::move(pickedZonesRet).value(); auto partHostsRet = pickHostsWithZone(pickedZones, zoneHosts); - if (!partHostsRet.ok()) { + if (!nebula::ok(partHostsRet)) { LOG(ERROR) << "Pick hosts with zone failed."; - code = nebula::cpp2::ErrorCode::E_INVALID_PARM; + code = nebula::error(partHostsRet); break; } - auto partHosts = std::move(partHostsRet).value(); + auto partHosts = nebula::value(partHostsRet); if (partHosts.empty()) { LOG(ERROR) << "Pick hosts is empty."; - code = nebula::cpp2::ErrorCode::E_INVALID_PARM; + code = nebula::cpp2::ErrorCode::E_NO_HOSTS; break; } @@ -268,19 +268,15 @@ void CreateSpaceProcessor::process(const cpp2::CreateSpaceReq& req) { LOG(INFO) << "Create space " << spaceName; } -StatusOr CreateSpaceProcessor::pickHostsWithZone( +ErrorOr CreateSpaceProcessor::pickHostsWithZone( const std::vector& zones, const std::unordered_map& zoneHosts) { Hosts pickedHosts; nebula::cpp2::ErrorCode code = nebula::cpp2::ErrorCode::SUCCEEDED; for (auto iter = zoneHosts.begin(); iter != zoneHosts.end(); iter++) { - if (code != nebula::cpp2::ErrorCode::SUCCEEDED) { - break; - } - if (iter->second.empty()) { LOG(ERROR) << "Zone " << iter->first << " is empty"; - code = nebula::cpp2::ErrorCode::E_INVALID_PARM; + code = nebula::cpp2::ErrorCode::E_ZONE_IS_EMPTY; break; } @@ -305,12 +301,13 @@ StatusOr CreateSpaceProcessor::pickHostsWithZone( } } + CHECK_CODE_AND_BREAK(); hostLoading_[picked] += 1; pickedHosts.emplace_back(toThriftHost(std::move(picked))); } if (code != nebula::cpp2::ErrorCode::SUCCEEDED) { - return Status::Error("Host not found"); + return code; } return pickedHosts; } diff --git a/src/meta/processors/parts/CreateSpaceProcessor.h b/src/meta/processors/parts/CreateSpaceProcessor.h index 67768039dbb..b52ab4ac958 100644 --- a/src/meta/processors/parts/CreateSpaceProcessor.h +++ b/src/meta/processors/parts/CreateSpaceProcessor.h @@ -26,8 +26,9 @@ class CreateSpaceProcessor : public BaseProcessor { : BaseProcessor(kvstore) {} // Get the host with the least load in the zone - StatusOr pickHostsWithZone(const std::vector& zones, - const std::unordered_map& zoneHosts); + ErrorOr pickHostsWithZone( + const std::vector& zones, + const std::unordered_map& zoneHosts); // Get the zones with the least load StatusOr> pickLightLoadZones(int32_t replicaFactor); diff --git a/src/meta/processors/zone/DivideZoneProcessor.cpp b/src/meta/processors/zone/DivideZoneProcessor.cpp index 3659a81e3de..6b43915a5ad 100644 --- a/src/meta/processors/zone/DivideZoneProcessor.cpp +++ b/src/meta/processors/zone/DivideZoneProcessor.cpp @@ -33,6 +33,7 @@ void DivideZoneProcessor::process(const cpp2::DivideZoneReq& req) { std::vector zoneNames; std::unordered_set totalHosts; + size_t totalHostsSize = 0; auto batchHolder = std::make_unique(); nebula::cpp2::ErrorCode code = nebula::cpp2::ErrorCode::SUCCEEDED; for (auto iter = zoneItems.begin(); iter != zoneItems.end(); iter++) { @@ -65,6 +66,7 @@ void DivideZoneProcessor::process(const cpp2::DivideZoneReq& req) { break; } + totalHostsSize += hosts.size(); std::copy(hosts.begin(), hosts.end(), std::inserter(totalHosts, totalHosts.end())); auto key = MetaKeyUtils::zoneKey(std::move(zone)); @@ -85,6 +87,13 @@ void DivideZoneProcessor::process(const cpp2::DivideZoneReq& req) { return; } + if (totalHostsSize != totalHosts.size()) { + LOG(ERROR) << "The host in zone list have duplicate element"; + handleErrorCode(nebula::cpp2::ErrorCode::E_INVALID_PARM); + onFinished(); + return; + } + for (auto& host : totalHosts) { auto iter = std::find(zoneHosts.begin(), zoneHosts.end(), host); if (iter == zoneHosts.end()) { diff --git a/src/meta/processors/zone/MergeZoneProcessor.cpp b/src/meta/processors/zone/MergeZoneProcessor.cpp index fe8f1f227bd..98c6b4b4379 100644 --- a/src/meta/processors/zone/MergeZoneProcessor.cpp +++ b/src/meta/processors/zone/MergeZoneProcessor.cpp @@ -169,6 +169,17 @@ void MergeZoneProcessor::process(const cpp2::MergeZoneReq& req) { // Write the merged zone into meta service auto key = MetaKeyUtils::zoneKey(zoneName); std::vector zoneHosts; + auto valueRet = doGet(key); + if (nebula::ok(valueRet)) { + auto it = std::find(zones.begin(), zones.end(), zoneName); + if (it == zones.end()) { + LOG(ERROR) << "The target zone should exist in merge zone list"; + handleErrorCode(nebula::cpp2::ErrorCode::E_INVALID_PARM); + onFinished(); + return; + } + } + for (auto& zone : zones) { auto zoneKey = MetaKeyUtils::zoneKey(zone); auto zoneValueRet = doGet(std::move(zoneKey)); diff --git a/src/meta/test/MetaClientTest.cpp b/src/meta/test/MetaClientTest.cpp index b5675e2d110..3c3ff5aa3ce 100644 --- a/src/meta/test/MetaClientTest.cpp +++ b/src/meta/test/MetaClientTest.cpp @@ -2278,6 +2278,13 @@ TEST(MetaClientTest, MergeZoneTest) { ASSERT_EQ("default_zone_127.0.0.1_8988", zones[2].get_zone_name()); ASSERT_EQ("default_zone_127.0.0.1_8989", zones[3].get_zone_name()); } + { + auto result = client + ->mergeZone({"default_zone_127.0.0.1_8986", "default_zone_127.0.0.1_8987"}, + "default_zone_127.0.0.1_8988") + .get(); + EXPECT_FALSE(result.ok()); + } { // Merge zones is empty auto result = client->mergeZone({}, "new_zone").get(); @@ -2479,6 +2486,26 @@ TEST(MetaClientTest, DivideZoneTest) { auto result = client->divideZone("default_zone", std::move(zoneItems)).get(); EXPECT_FALSE(result.ok()); } + { + std::unordered_map> zoneItems; + std::vector oneHosts = { + {"127.0.0.1", 8986}, {"127.0.0.1", 8987}, {"127.0.0.1", 8988}}; + zoneItems.emplace("one_zone", std::move(oneHosts)); + std::vector anotherHosts = {{"127.0.0.1", 8988}, {"127.0.0.1", 8989}}; + zoneItems.emplace("another_zone", std::move(anotherHosts)); + auto result = client->divideZone("default_zone", std::move(zoneItems)).get(); + EXPECT_FALSE(result.ok()); + } + { + std::unordered_map> zoneItems; + std::vector oneHosts = { + {"127.0.0.1", 8986}, {"127.0.0.1", 8987}, {"127.0.0.1", 8987}}; + zoneItems.emplace("one_zone", std::move(oneHosts)); + std::vector anotherHosts = {{"127.0.0.1", 8988}, {"127.0.0.1", 8989}}; + zoneItems.emplace("another_zone", std::move(anotherHosts)); + auto result = client->divideZone("default_zone", std::move(zoneItems)).get(); + EXPECT_FALSE(result.ok()); + } { std::unordered_map> zoneItems; std::vector hosts0 = {}; diff --git a/src/meta/test/ProcessorTest.cpp b/src/meta/test/ProcessorTest.cpp index 7e7f5c3edad..58bce373c42 100644 --- a/src/meta/test/ProcessorTest.cpp +++ b/src/meta/test/ProcessorTest.cpp @@ -3295,7 +3295,7 @@ TEST(ProcessorTest, DropHostsTest) { auto f = processor->getFuture(); processor->process(req); auto resp = std::move(f).get(); - ASSERT_EQ(nebula::cpp2::ErrorCode::E_INVALID_PARM, resp.get_code()); + ASSERT_EQ(nebula::cpp2::ErrorCode::E_ZONE_NOT_ENOUGH, resp.get_code()); } { cpp2::AddHostsIntoZoneReq req; @@ -3432,7 +3432,7 @@ TEST(ProcessorTest, DropHostsTest) { auto f = processor->getFuture(); processor->process(req); auto resp = std::move(f).get(); - ASSERT_EQ(nebula::cpp2::ErrorCode::E_INVALID_PARM, resp.get_code()); + ASSERT_EQ(nebula::cpp2::ErrorCode::E_ZONE_NOT_ENOUGH, resp.get_code()); } { // Drop hosts which hold partition. @@ -3762,6 +3762,16 @@ TEST(ProcessorTest, MergeZoneTest) { ASSERT_EQ("default_zone_127.0.0.1_8988", zones[2].get_zone_name()); ASSERT_EQ("default_zone_127.0.0.1_8989", zones[3].get_zone_name()); } + { + cpp2::MergeZoneReq req; + req.zones_ref() = {"default_zone_127.0.0.1_8986", "default_zone_127.0.0.1_8987"}; + req.zone_name_ref() = "default_zone_127.0.0.1_8988"; + auto* processor = MergeZoneProcessor::instance(kv.get()); + auto f = processor->getFuture(); + processor->process(req); + auto resp = std::move(f).get(); + ASSERT_EQ(nebula::cpp2::ErrorCode::E_INVALID_PARM, resp.get_code()); + } { // Merge an empty zone list cpp2::MergeZoneReq req; @@ -4102,6 +4112,38 @@ TEST(ProcessorTest, DivideZoneTest) { auto resp = std::move(f).get(); ASSERT_EQ(nebula::cpp2::ErrorCode::E_INVALID_PARM, resp.get_code()); } + { + cpp2::DivideZoneReq req; + req.zone_name_ref() = "default_zone"; + std::unordered_map> zoneItems; + std::vector oneHosts = { + {"127.0.0.1", 8986}, {"127.0.0.1", 8987}, {"127.0.0.1", 8988}}; + zoneItems.emplace("one_zone", std::move(oneHosts)); + std::vector anotherHosts = {{"127.0.0.1", 8988}, {"127.0.0.1", 8989}}; + zoneItems.emplace("another_zone", std::move(anotherHosts)); + req.zone_items_ref() = std::move(zoneItems); + auto* processor = DivideZoneProcessor::instance(kv.get()); + auto f = processor->getFuture(); + processor->process(req); + auto resp = std::move(f).get(); + ASSERT_EQ(nebula::cpp2::ErrorCode::E_INVALID_PARM, resp.get_code()); + } + { + cpp2::DivideZoneReq req; + req.zone_name_ref() = "default_zone"; + std::unordered_map> zoneItems; + std::vector oneHosts = { + {"127.0.0.1", 8986}, {"127.0.0.1", 8987}, {"127.0.0.1", 8987}}; + zoneItems.emplace("one_zone", std::move(oneHosts)); + std::vector anotherHosts = {{"127.0.0.1", 8988}, {"127.0.0.1", 8989}}; + zoneItems.emplace("another_zone", std::move(anotherHosts)); + req.zone_items_ref() = std::move(zoneItems); + auto* processor = DivideZoneProcessor::instance(kv.get()); + auto f = processor->getFuture(); + processor->process(req); + auto resp = std::move(f).get(); + ASSERT_EQ(nebula::cpp2::ErrorCode::E_INVALID_PARM, resp.get_code()); + } { cpp2::DivideZoneReq req; req.zone_name_ref() = "default_zone"; diff --git a/src/mock/MockData.cpp b/src/mock/MockData.cpp index 78811aead2f..664f6582de9 100644 --- a/src/mock/MockData.cpp +++ b/src/mock/MockData.cpp @@ -307,21 +307,29 @@ std::shared_ptr MockData::mockPlayerTagSchema(Object if (!hasProp) { return schema; } - schema->addField("name", PropertyType::STRING, 0, false, ConstantExpression::make(pool, "")); + schema->addField( + "name", PropertyType::STRING, 0, false, ConstantExpression::make(pool, "")->encode()); // only age filed has no default value and nullable is false schema->addField("age", PropertyType::INT64, 0, false); - schema->addField("playing", PropertyType::BOOL, 0, false, ConstantExpression::make(pool, true)); - schema->addField("career", PropertyType::INT64, 0, false, ConstantExpression::make(pool, 10L)); - schema->addField("startYear", PropertyType::INT64, 0, false, ConstantExpression::make(pool, 0L)); - schema->addField("endYear", PropertyType::INT64, 0, false, ConstantExpression::make(pool, 0L)); - schema->addField("games", PropertyType::INT64, 0, false, ConstantExpression::make(pool, 0L)); - schema->addField("avgScore", PropertyType::DOUBLE, 0, false, ConstantExpression::make(pool, 0.0)); - schema->addField("serveTeams", PropertyType::INT64, 0, false, ConstantExpression::make(pool, 0L)); + schema->addField( + "playing", PropertyType::BOOL, 0, false, ConstantExpression::make(pool, true)->encode()); + schema->addField( + "career", PropertyType::INT64, 0, false, ConstantExpression::make(pool, 10L)->encode()); + schema->addField( + "startYear", PropertyType::INT64, 0, false, ConstantExpression::make(pool, 0L)->encode()); + schema->addField( + "endYear", PropertyType::INT64, 0, false, ConstantExpression::make(pool, 0L)->encode()); + schema->addField( + "games", PropertyType::INT64, 0, false, ConstantExpression::make(pool, 0L)->encode()); + schema->addField( + "avgScore", PropertyType::DOUBLE, 0, false, ConstantExpression::make(pool, 0.0)->encode()); + schema->addField( + "serveTeams", PropertyType::INT64, 0, false, ConstantExpression::make(pool, 0L)->encode()); // Set ttl property if (FLAGS_mock_ttl_col) { schema->addField( - "insertTime", PropertyType::INT64, 0, false, ConstantExpression::make(pool, 0L)); + "insertTime", PropertyType::INT64, 0, false, ConstantExpression::make(pool, 0L)->encode()); meta::cpp2::SchemaProp prop; prop.ttl_col_ref() = "insertTime"; prop.ttl_duration_ref() = FLAGS_mock_ttl_duration; @@ -329,8 +337,11 @@ std::shared_ptr MockData::mockPlayerTagSchema(Object } // Use default value - schema->addField( - "country", PropertyType::STRING, 0, false, ConstantExpression::make(pool, "America")); + schema->addField("country", + PropertyType::STRING, + 0, + false, + ConstantExpression::make(pool, "America")->encode()); // Use nullable schema->addField("champions", PropertyType::INT64, 0, true); @@ -355,21 +366,27 @@ std::shared_ptr MockData::mockServeEdgeSchema(Object return schema; } schema->addField( - "playerName", PropertyType::STRING, 0, false, ConstantExpression::make(pool, "")); - schema->addField("teamName", PropertyType::STRING, 0, false, ConstantExpression::make(pool, "")); + "playerName", PropertyType::STRING, 0, false, ConstantExpression::make(pool, "")->encode()); schema->addField( - "startYear", PropertyType::INT64, 0, false, ConstantExpression::make(pool, 2020L)); - schema->addField("endYear", PropertyType::INT64, 0, false, ConstantExpression::make(pool, 2020L)); + "teamName", PropertyType::STRING, 0, false, ConstantExpression::make(pool, "")->encode()); + schema->addField( + "startYear", PropertyType::INT64, 0, false, ConstantExpression::make(pool, 2020L)->encode()); + schema->addField( + "endYear", PropertyType::INT64, 0, false, ConstantExpression::make(pool, 2020L)->encode()); // only teamCareer filed has no default value and nullable is false schema->addField("teamCareer", PropertyType::INT64, 0, false); - schema->addField("teamGames", PropertyType::INT64, 0, false, ConstantExpression::make(pool, 1L)); schema->addField( - "teamAvgScore", PropertyType::DOUBLE, 0, false, ConstantExpression::make(pool, 0.0)); + "teamGames", PropertyType::INT64, 0, false, ConstantExpression::make(pool, 1L)->encode()); + schema->addField("teamAvgScore", + PropertyType::DOUBLE, + 0, + false, + ConstantExpression::make(pool, 0.0)->encode()); // Set ttl property if (FLAGS_mock_ttl_col) { schema->addField( - "insertTime", PropertyType::INT64, 0, false, ConstantExpression::make(pool, 0L)); + "insertTime", PropertyType::INT64, 0, false, ConstantExpression::make(pool, 0L)->encode()); meta::cpp2::SchemaProp prop; prop.ttl_col_ref() = "insertTime"; prop.ttl_duration_ref() = FLAGS_mock_ttl_duration; @@ -377,7 +394,8 @@ std::shared_ptr MockData::mockServeEdgeSchema(Object } // Use default value - schema->addField("type", PropertyType::STRING, 0, false, ConstantExpression::make(pool, "trade")); + schema->addField( + "type", PropertyType::STRING, 0, false, ConstantExpression::make(pool, "trade")->encode()); // Use nullable schema->addField("champions", PropertyType::INT64, 0, true); @@ -530,26 +548,41 @@ std::shared_ptr MockData::mockTypicaSchemaV2(ObjectP std::shared_ptr schema(new meta::NebulaSchemaProvider(0)); schema->addField("col_bool", PropertyType::BOOL); schema->addField("col_bool_null", PropertyType::BOOL, 0, true); - schema->addField( - "col_bool_default", PropertyType::BOOL, 0, false, ConstantExpression::make(pool, true)); + schema->addField("col_bool_default", + PropertyType::BOOL, + 0, + false, + ConstantExpression::make(pool, true)->encode()); schema->addField("col_int", PropertyType::INT64); schema->addField("col_int_null", PropertyType::INT64, 0, true); - schema->addField( - "col_int_default", PropertyType::INT64, 0, false, ConstantExpression::make(pool, 20L)); + schema->addField("col_int_default", + PropertyType::INT64, + 0, + false, + ConstantExpression::make(pool, 20L)->encode()); schema->addField("col_float", PropertyType::FLOAT); schema->addField("col_float_null", PropertyType::FLOAT, 0, true); - schema->addField( - "col_float_default", PropertyType::FLOAT, 0, false, ConstantExpression::make(pool, 2.2F)); + schema->addField("col_float_default", + PropertyType::FLOAT, + 0, + false, + ConstantExpression::make(pool, 2.2F)->encode()); schema->addField("col_str", PropertyType::STRING); schema->addField("col_str_null", PropertyType::STRING, 0, true); - schema->addField( - "col_str_default", PropertyType::STRING, 0, false, ConstantExpression::make(pool, "sky")); + schema->addField("col_str_default", + PropertyType::STRING, + 0, + false, + ConstantExpression::make(pool, "sky")->encode()); schema->addField("col_date", PropertyType::DATE); schema->addField("col_date_null", PropertyType::DATE, 0, true); const Date date = {2020, 2, 20}; - schema->addField( - "col_date_default", PropertyType::DATE, 0, false, ConstantExpression::make(pool, date)); + schema->addField("col_date_default", + PropertyType::DATE, + 0, + false, + ConstantExpression::make(pool, date)->encode()); return schema; } diff --git a/src/storage/StorageServer.cpp b/src/storage/StorageServer.cpp index e0a449cf734..eec3e5fff4a 100644 --- a/src/storage/StorageServer.cpp +++ b/src/storage/StorageServer.cpp @@ -198,6 +198,9 @@ bool StorageServer::start() { LOG(INFO) << "Init kvstore"; kvstore_ = getStoreInstance(); + LOG(INFO) << "Init LogMonitor"; + logMonitor_ = std::make_unique(); + if (nullptr == kvstore_) { LOG(ERROR) << "Init kvstore failed"; return false; diff --git a/src/storage/StorageServer.h b/src/storage/StorageServer.h index 2f907263ef9..3637f6f9091 100644 --- a/src/storage/StorageServer.h +++ b/src/storage/StorageServer.h @@ -11,6 +11,7 @@ #include "clients/meta/MetaClient.h" #include "common/base/Base.h" #include "common/hdfs/HdfsHelper.h" +#include "common/log/LogMonitor.h" #include "common/meta/IndexManager.h" #include "common/meta/SchemaManager.h" #include "kvstore/NebulaStore.h" @@ -97,6 +98,8 @@ class StorageServer final { // used for communicate between one storaged to another std::unique_ptr interClient_; + std::unique_ptr logMonitor_; + ServiceStatus serverStatus_{STATUS_UNINITIALIZED}; std::mutex muStop_; std::condition_variable cvStop_; diff --git a/src/storage/exec/IndexScanNode.h b/src/storage/exec/IndexScanNode.h index d0df49c5141..f7dc2e13595 100644 --- a/src/storage/exec/IndexScanNode.h +++ b/src/storage/exec/IndexScanNode.h @@ -282,7 +282,7 @@ class QualifiedStrategy { * [start,end) which is generated by `RangeIndex`. Therefore, it is necessary to make additional * judgment on the truncated string type index * For example: - * (ab)c meas that string is "abc" but index val has been truncated to "ab". (ab)c > ab is + * (ab)c means that string is "abc" but index val has been truncated to "ab". (ab)c > ab is * `UNCERTAIN`, and (ab)c > aa is COMPATIBLE. * * Args: diff --git a/src/storage/exec/QueryUtils.h b/src/storage/exec/QueryUtils.h index 20a8b5685b6..948235802b9 100644 --- a/src/storage/exec/QueryUtils.h +++ b/src/storage/exec/QueryUtils.h @@ -57,7 +57,9 @@ class QueryUtils final { VLOG(1) << "Fail to read prop " << propName; if (field->hasDefault()) { DefaultValueContext expCtx; - auto expr = field->defaultValue()->clone(); + ObjectPool pool; + auto& exprStr = field->defaultValue(); + auto expr = Expression::decode(&pool, folly::StringPiece(exprStr.data(), exprStr.size())); return Expression::eval(expr, expCtx); } else if (field->nullable()) { return NullType::__NULL__; diff --git a/src/storage/exec/UpdateNode.h b/src/storage/exec/UpdateNode.h index f000c21e87d..b2114e05878 100644 --- a/src/storage/exec/UpdateNode.h +++ b/src/storage/exec/UpdateNode.h @@ -54,7 +54,9 @@ class UpdateNode : public RelNode { nebula::cpp2::ErrorCode getDefaultOrNullValue(const meta::SchemaProviderIf::Field* field, const std::string& name) { if (field->hasDefault()) { - auto expr = field->defaultValue()->clone(); + ObjectPool pool; + auto& exprStr = field->defaultValue(); + auto expr = Expression::decode(&pool, folly::StringPiece(exprStr.data(), exprStr.size())); props_[field->name()] = Expression::eval(expr, *expCtx_); } else if (field->nullable()) { props_[name] = Value::kNullValue; diff --git a/src/storage/test/IndexWithTTLTest.cpp b/src/storage/test/IndexWithTTLTest.cpp index b5142e3517b..30b525e247d 100644 --- a/src/storage/test/IndexWithTTLTest.cpp +++ b/src/storage/test/IndexWithTTLTest.cpp @@ -67,8 +67,11 @@ void createSchema(meta::SchemaManager* schemaMan, auto* sm = reinterpret_cast(schemaMan); std::shared_ptr schema(new meta::NebulaSchemaProvider(0)); schema->addField("c1", nebula::cpp2::PropertyType::INT64, 0, false); - schema->addField( - "c2", nebula::cpp2::PropertyType::INT64, 0, false, ConstantExpression::make(pool, 0L)); + schema->addField("c2", + nebula::cpp2::PropertyType::INT64, + 0, + false, + ConstantExpression::make(pool, 0L)->encode()); meta::cpp2::SchemaProp prop; prop.ttl_col_ref() = "c2"; prop.ttl_duration_ref() = duration; diff --git a/src/storage/transaction/ChainAddEdgesLocalProcessor.cpp b/src/storage/transaction/ChainAddEdgesLocalProcessor.cpp index 0a3eed149db..1e58236c370 100644 --- a/src/storage/transaction/ChainAddEdgesLocalProcessor.cpp +++ b/src/storage/transaction/ChainAddEdgesLocalProcessor.cpp @@ -497,7 +497,9 @@ void ChainAddEdgesLocalProcessor::replaceNullWithDefaultValue(cpp2::AddEdgesRequ for (auto i = vals.size(); i < idxVec.size(); ++i) { auto field = schema->field(idxVec[i]); if (field->hasDefault()) { - auto expr = field->defaultValue()->clone(); + auto exprStr = field->defaultValue(); + ObjectPool pool; + auto expr = Expression::decode(&pool, folly::StringPiece(exprStr.data(), exprStr.size())); auto defVal = Expression::eval(expr, expCtx); switch (defVal.type()) { case Value::Type::BOOL: diff --git a/tests/Makefile b/tests/Makefile index 5acb5635fb8..0275c0f51f0 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -2,7 +2,7 @@ # # This source code is licensed under Apache 2.0 License. -.PHONY: fmt check check-and-diff init init-all clean test tck fail up down test-all +.PHONY: fmt check check-and-diff init init-all clean test tck fail up down test-all ldbc PYPI_MIRROR = https://mirrors.aliyun.com/pypi/simple/ # PYPI_MIRROR = http://pypi.mirrors.ustc.edu.cn/simple --trusted-host pypi.mirrors.ustc.edu.cn @@ -95,7 +95,10 @@ slow-query: currdir tck: jobs slow-query $(test_j) tck/steps/test_tck.py -test-all: test tck +ldbc: currdir + $(test_j) tck/steps/test_ldbc.py + +test-all: test tck ldbc fail: currdir python3 -m pytest \ diff --git a/tests/common/utils.py b/tests/common/utils.py index 6f2f62ed0db..75323d0b83b 100644 --- a/tests/common/utils.py +++ b/tests/common/utils.py @@ -432,8 +432,9 @@ def load_csv_data( # wait heartbeat_interval_secs + 1 seconds for schema synchronization time.sleep(2) - for fd in config["files"]: - _load_data_from_file(sess, data_dir, fd) + if config["files"] is not None: + for fd in config["files"]: + _load_data_from_file(sess, data_dir, fd) return space_desc diff --git a/tests/conftest.py b/tests/conftest.py index 7802f31238d..4f79d1de866 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -216,6 +216,11 @@ def load_nba_int_vid_data(): yield load_csv_data_once("nba_int_vid") +@pytest.fixture(scope="session") +def load_ldbc_v0_3_3(): + yield load_csv_data_once("ldbc_v0_3_3") + + @pytest.fixture(scope="session") def load_student_data(): yield load_csv_data_once("student") diff --git a/tests/data/ldbc_v0_3_3/config.yaml b/tests/data/ldbc_v0_3_3/config.yaml new file mode 100644 index 00000000000..200afa3d2c7 --- /dev/null +++ b/tests/data/ldbc_v0_3_3/config.yaml @@ -0,0 +1,41 @@ +space: + name: ldbc_v0_3_3 + partitionNum: 8 + replicaFactor: 1 + vidType: FIXED_STRING(32) + charset: utf8 + collate: utf8_bin +schema: | + CREATE TAG IF NOT EXISTS `Place`(`name` string,`url` string,`type` string); + CREATE TAG IF NOT EXISTS `City`(`name` string,`url` string,`type` string); + CREATE TAG IF NOT EXISTS `Country`(`name` string,`url` string,`type` string); + CREATE TAG IF NOT EXISTS `Continent`(`name` string,`url` string,`type` string); + CREATE TAG IF NOT EXISTS `Message`(`creationDate` string,`locationIP` string,`browserUsed` string,`content` string,`length` int); + CREATE TAG IF NOT EXISTS `Comment`(`creationDate` string,`locationIP` string,`browserUsed` string,`content` string,`length` int); + CREATE TAG IF NOT EXISTS `Post`(`imageFile` string,`creationDate` string,`locationIP` string,`browserUsed` string,`language` string,`content` string,`length` int); + CREATE TAG IF NOT EXISTS `Organisation`(`type` string,`name` string,`url` string); + CREATE TAG IF NOT EXISTS `University`(`type` string,`name` string,`url` string); + CREATE TAG IF NOT EXISTS `Company`(`type` string,`name` string,`url` string); + CREATE TAG IF NOT EXISTS `Person`(`firstName` string,`lastName` string,`gender` string,`birthday` string,`creationDate` string,`locationIP` string,`browserUsed` string, `email` string, `speaks` string); + CREATE TAG IF NOT EXISTS `TagClass`(`name` string,`url` string); + CREATE TAG IF NOT EXISTS `Forum`(`title` string,`creationDate` string); + CREATE TAG IF NOT EXISTS `Tag`(`name` string,`url` string); + CREATE EDGE IF NOT EXISTS `IS_PART_OF`(); + CREATE EDGE IF NOT EXISTS `LIKES`(`creationDate` string); + CREATE EDGE IF NOT EXISTS `HAS_CREATOR`(); + CREATE EDGE IF NOT EXISTS `HAS_INTEREST`(); + CREATE EDGE IF NOT EXISTS `IS_SUBCLASS_OF`(); + CREATE EDGE IF NOT EXISTS `IS_LOCATED_IN`(); + CREATE EDGE IF NOT EXISTS `HAS_MODERATOR`(); + CREATE EDGE IF NOT EXISTS `HAS_TAG`(); + CREATE EDGE IF NOT EXISTS `WORK_AT`(`workFrom` int); + CREATE EDGE IF NOT EXISTS `REPLY_OF`(); + CREATE EDGE IF NOT EXISTS `STUDY_AT`(`classYear` int); + CREATE EDGE IF NOT EXISTS `CONTAINER_OF`(); + CREATE EDGE IF NOT EXISTS `HAS_MEMBER`(`joinDate` string); + CREATE EDGE IF NOT EXISTS `KNOWS`(`creationDate` string); + CREATE EDGE IF NOT EXISTS `HAS_TYPE`(); + CREATE TAG INDEX message_create_date ON `Message`(`creationDate`(20)); + CREATE TAG INDEX person_first_name ON `Person`(`firstName`(20)); +files: + diff --git a/tests/tck/conftest.py b/tests/tck/conftest.py index 13b2ee1fbe3..f2abc3e57a1 100644 --- a/tests/tck/conftest.py +++ b/tests/tck/conftest.py @@ -175,6 +175,7 @@ def preload_space( load_nba_data, load_nba_int_vid_data, load_student_data, + load_ldbc_v0_3_3, session, graph_spaces, ): @@ -185,6 +186,8 @@ def preload_space( graph_spaces["space_desc"] = load_nba_int_vid_data elif space == "student": graph_spaces["space_desc"] = load_student_data + elif space == "ldbc_v0_3_3": + graph_spaces["ldbc_v0_3_3"] = load_ldbc_v0_3_3 else: raise ValueError(f"Invalid space name given: {space}") resp_ok(session, f'USE {space};', True) diff --git a/tests/tck/features/match/Base.IntVid.feature b/tests/tck/features/match/Base.IntVid.feature index dad37208f31..eb6fd0f1d58 100644 --- a/tests/tck/features/match/Base.IntVid.feature +++ b/tests/tck/features/match/Base.IntVid.feature @@ -575,3 +575,17 @@ Feature: Basic match RETURN count(names) """ Then a SemanticError should be raised at runtime: To get the property of the vertex in `v3.name', should use the format `var.tag.prop' + + Scenario: filter is not a valid expression + When executing query: + """ + MATCH (v:player) WHERE v.player.name-'n'=="Tim Duncann" RETURN v + """ + Then a SemanticError should be raised at runtime: `(v.player.name-"n")' is not a valid expression, can not apply `-' to `__EMPTY__' and `STRING'. + When executing query: + """ + MATCH (v:player) WHERE v.player.name+'n'=="Tim Duncann" RETURN v + """ + Then the result should be, in any order: + | v | + | ("Tim Duncan":bachelor{name: "Tim Duncan", speciality: "psychology"} :player{age: 42, name: "Tim Duncan"}) | diff --git a/tests/tck/features/match/Base.feature b/tests/tck/features/match/Base.feature index b86b7bab3cc..13573b18c47 100644 --- a/tests/tck/features/match/Base.feature +++ b/tests/tck/features/match/Base.feature @@ -685,3 +685,17 @@ Feature: Basic match RETURN count(names) """ Then a SemanticError should be raised at runtime: To get the property of the vertex in `v3.name', should use the format `var.tag.prop' + + Scenario: filter is not a valid expression + When executing query: + """ + MATCH (v:player) WHERE v.player.name-'n'=="Tim Duncann" RETURN v + """ + Then a SemanticError should be raised at runtime: `(v.player.name-"n")' is not a valid expression, can not apply `-' to `__EMPTY__' and `STRING'. + When executing query: + """ + MATCH (v:player) WHERE v.player.name+'n'=="Tim Duncann" RETURN v + """ + Then the result should be, in any order: + | v | + | ("Tim Duncan":bachelor{name: "Tim Duncan", speciality: "psychology"} :player{age: 42, name: "Tim Duncan"}) | diff --git a/tests/tck/features/match/SeekById.feature b/tests/tck/features/match/SeekById.feature index 2b55ac72bd6..5ac4839f7ac 100644 --- a/tests/tck/features/match/SeekById.feature +++ b/tests/tck/features/match/SeekById.feature @@ -208,12 +208,23 @@ Feature: Match seek by id RETURN v.player.name AS Name, t.team.name AS Team """ Then the result should be, in any order: - | Name | Team | - | 'Paul Gasol' | 'Grizzlies' | - | 'Paul Gasol' | 'Lakers' | - | 'Paul Gasol' | 'Bulls' | - | 'Paul Gasol' | 'Spurs' | - | 'Paul Gasol' | 'Bucks' | + | Name | Team | + | "Paul Gasol" | "Bucks" | + | "Paul Gasol" | "Bulls" | + | "Rudy Gay" | "Grizzlies" | + | "Kyle Anderson" | "Grizzlies" | + | "Paul Gasol" | "Grizzlies" | + | "Marc Gasol" | "Grizzlies" | + | "Vince Carter" | "Grizzlies" | + | "Paul Gasol" | "Spurs" | + | "Dwight Howard" | "Lakers" | + | "Shaquille O'Neal" | "Lakers" | + | "Steve Nash" | "Lakers" | + | "Paul Gasol" | "Lakers" | + | "Kobe Bryant" | "Lakers" | + | "JaVale McGee" | "Lakers" | + | "Rajon Rondo" | "Lakers" | + | "LeBron James" | "Lakers" | Scenario: can't refer When executing query: @@ -243,7 +254,7 @@ Feature: Match seek by id WHERE (id(v) + '') == 'James Harden' RETURN v.player.name AS Name """ - Then a SemanticError should be raised at runtime: + Then a ExecutionError should be raised at runtime: Scan vertices or edges need to specify a limit number, or limit number can not push down. When executing query: """ MATCH (v) @@ -252,8 +263,7 @@ Feature: Match seek by id """ Then a ExecutionError should be raised at runtime: Scan vertices or edges need to specify a limit number, or limit number can not push down. - @skip - Scenario: test OR logic (reason = "or logic optimization error") + Scenario: test OR logic When executing query: """ MATCH (v) @@ -261,13 +271,7 @@ Feature: Match seek by id OR v.player.age == 23 RETURN v.player.name AS Name """ - Then the result should be, in any order: - | Name | - | 'James Harden' | - | 'Jonathon Simmons' | - | 'Klay Thompson' | - | 'Dejounte Murray' | - | 'Kristaps Porzingis' | + Then a ExecutionError should be raised at runtime: Scan vertices or edges need to specify a limit number, or limit number can not push down. When executing query: """ MATCH (v) @@ -275,10 +279,7 @@ Feature: Match seek by id OR v.player.age == 23 RETURN v.player.name AS Name """ - Then the result should be, in any order: - | Name | - | 'James Harden' | - | 'Kristaps Porzingis' | + Then a ExecutionError should be raised at runtime: Scan vertices or edges need to specify a limit number, or limit number can not push down. When executing query: """ MATCH (v) @@ -286,8 +287,71 @@ Feature: Match seek by id OR v.player.age != 23 RETURN v.player.name AS Name """ + Then a ExecutionError should be raised at runtime: Scan vertices or edges need to specify a limit number, or limit number can not push down. + When executing query: + """ + MATCH (v:player) + WHERE v.player.name == "Tim Duncan" + OR v.player.age == 23 + RETURN v + """ Then the result should be, in any order: - | Name | + | v | + | ("Kristaps Porzingis" :player{age: 23, name: "Kristaps Porzingis"}) | + | ("Tim Duncan" :bachelor{name: "Tim Duncan", speciality: "psychology"} :player{age: 42, name: "Tim Duncan"}) | + When executing query: + """ + MATCH (v:player) + WHERE v.player.name == "Tim Duncan" + OR v.noexist.age == 23 + RETURN v + """ + Then the result should be, in any order: + | v | + | ("Tim Duncan" :bachelor{name: "Tim Duncan", speciality: "psychology"} :player{age: 42, name: "Tim Duncan"}) | + When executing query: + """ + MATCH (v:player) + WHERE v.player.noexist == "Tim Duncan" + OR v.player.age == 23 + RETURN v + """ + Then the result should be, in any order: + | v | + | ("Kristaps Porzingis" :player{age: 23, name: "Kristaps Porzingis"}) | + When executing query: + """ + MATCH (v:player) + WHERE v.player.noexist == "Tim Duncan" + OR v.noexist.age == 23 + RETURN v + """ + Then the result should be, in any order: + | v | + When executing query: + """ + MATCH (v:player) + WHERE "Tim Duncan" == v.player.name + OR 23 + 1 == v.noexist.age - 3 + RETURN v + """ + Then the result should be, in any order: + | v | + | ("Tim Duncan" :bachelor{name: "Tim Duncan", speciality: "psychology"} :player{age: 42, name: "Tim Duncan"}) | + When executing query: + """ + MATCH (v) + WHERE id(v) IN ['James Harden', 'Jonathon Simmons', 'Klay Thompson', 'Dejounte Murray'] + OR id(v) == 'Yao Ming' + RETURN v + """ + Then the result should be, in any order: + | v | + | ("James Harden" :player{age: 29, name: "James Harden"}) | + | ("Jonathon Simmons" :player{age: 29, name: "Jonathon Simmons"}) | + | ("Klay Thompson" :player{age: 29, name: "Klay Thompson"}) | + | ("Dejounte Murray" :player{age: 29, name: "Dejounte Murray"}) | + | ("Yao Ming" :player{age: 38, name: "Yao Ming"}) | Scenario: Start from end When executing query: diff --git a/tests/tck/features/match/SeekById.intVid.feature b/tests/tck/features/match/SeekById.intVid.feature index 70fec05da7b..e693b3b5dfe 100644 --- a/tests/tck/features/match/SeekById.intVid.feature +++ b/tests/tck/features/match/SeekById.intVid.feature @@ -208,12 +208,23 @@ Feature: Match seek by id RETURN v.player.name AS Name, t.team.name AS Team """ Then the result should be, in any order: - | Name | Team | - | 'Paul Gasol' | 'Grizzlies' | - | 'Paul Gasol' | 'Lakers' | - | 'Paul Gasol' | 'Bulls' | - | 'Paul Gasol' | 'Spurs' | - | 'Paul Gasol' | 'Bucks' | + | Name | Team | + | "Paul Gasol" | "Bucks" | + | "Paul Gasol" | "Bulls" | + | "Rudy Gay" | "Grizzlies" | + | "Kyle Anderson" | "Grizzlies" | + | "Paul Gasol" | "Grizzlies" | + | "Marc Gasol" | "Grizzlies" | + | "Vince Carter" | "Grizzlies" | + | "Paul Gasol" | "Spurs" | + | "Dwight Howard" | "Lakers" | + | "Shaquille O'Neal" | "Lakers" | + | "Steve Nash" | "Lakers" | + | "Paul Gasol" | "Lakers" | + | "Kobe Bryant" | "Lakers" | + | "JaVale McGee" | "Lakers" | + | "Rajon Rondo" | "Lakers" | + | "LeBron James" | "Lakers" | Scenario: can't refer When executing query: @@ -245,8 +256,7 @@ Feature: Match seek by id """ Then a ExecutionError should be raised at runtime: Scan vertices or edges need to specify a limit number, or limit number can not push down. - @skip - Scenario: test OR logic (reason = "or logic optimization error") + Scenario: test OR logic When executing query: """ MATCH (v) @@ -254,13 +264,7 @@ Feature: Match seek by id OR v.player.age == 23 RETURN v.player.name AS Name """ - Then the result should be, in any order: - | Name | - | 'James Harden' | - | 'Jonathon Simmons' | - | 'Klay Thompson' | - | 'Dejounte Murray' | - | 'Kristaps Porzingis' | + Then a ExecutionError should be raised at runtime: Scan vertices or edges need to specify a limit number, or limit number can not push down. When executing query: """ MATCH (v) @@ -268,10 +272,7 @@ Feature: Match seek by id OR v.player.age == 23 RETURN v.player.name AS Name """ - Then the result should be, in any order: - | Name | - | 'James Harden' | - | 'Kristaps Porzingis' | + Then a ExecutionError should be raised at runtime: Scan vertices or edges need to specify a limit number, or limit number can not push down. When executing query: """ MATCH (v) @@ -279,8 +280,71 @@ Feature: Match seek by id OR v.player.age != 23 RETURN v.player.name AS Name """ + Then a ExecutionError should be raised at runtime: Scan vertices or edges need to specify a limit number, or limit number can not push down. + When executing query: + """ + MATCH (v:player) + WHERE v.player.name == "Tim Duncan" + OR v.player.age == 23 + RETURN v.player.name as name + """ Then the result should be, in any order: - | Name | + | name | + | "Kristaps Porzingis" | + | "Tim Duncan" | + When executing query: + """ + MATCH (v:player) + WHERE v.player.name == "Tim Duncan" + OR v.noexist.age == 23 + RETURN v.player.name as name + """ + Then the result should be, in any order: + | name | + | "Tim Duncan" | + When executing query: + """ + MATCH (v:player) + WHERE v.player.noexist == "Tim Duncan" + OR v.player.age == 23 + RETURN v.player.name as name + """ + Then the result should be, in any order: + | name | + | "Kristaps Porzingis" | + When executing query: + """ + MATCH (v:player) + WHERE v.player.noexist == "Tim Duncan" + OR v.noexist.age == 23 + RETURN v + """ + Then the result should be, in any order: + | v | + When executing query: + """ + MATCH (v:player) + WHERE "Tim Duncan" == v.player.name + OR 23 + 1 == v.noexist.age - 3 + RETURN v.player.name as name + """ + Then the result should be, in any order: + | name | + | "Tim Duncan" | + When executing query: + """ + MATCH (v) + WHERE id(v) IN [hash('James Harden'), hash('Jonathon Simmons'), hash('Klay Thompson'), hash('Dejounte Murray')] + OR id(v) == hash('Yao Ming') + RETURN v.player.name as name + """ + Then the result should be, in any order: + | name | + | "James Harden" | + | "Jonathon Simmons" | + | "Klay Thompson" | + | "Dejounte Murray" | + | "Yao Ming" | Scenario: with arithmetic When executing query: diff --git a/tests/tck/features/optimizer/IndexScanRule.feature b/tests/tck/features/optimizer/IndexScanRule.feature index 34a19251298..906d774ef57 100644 --- a/tests/tck/features/optimizer/IndexScanRule.feature +++ b/tests/tck/features/optimizer/IndexScanRule.feature @@ -93,9 +93,10 @@ Feature: Match index selection And the execution plan should be: | id | name | dependencies | operator info | | 6 | Project | 2 | | - | 2 | AppendVertices | 5 | | - | 5 | IndexScan | 0 | | - | 0 | Start | | | + | 9 | Filter | 3 | | + | 3 | AppendVertices | 7 | | + | 7 | IndexScan | 2 | | + | 2 | Start | | | Scenario: degenerate to full tag scan When profiling query: diff --git a/tests/tck/ldbc/business_intelligence_workload/Read.feature b/tests/tck/ldbc/business_intelligence_workload/Read.feature new file mode 100644 index 00000000000..c913038b89d --- /dev/null +++ b/tests/tck/ldbc/business_intelligence_workload/Read.feature @@ -0,0 +1,558 @@ +# Copyright (c) 2021 vesoft inc. All rights reserved. +# +# This source code is licensed under Apache 2.0 License. +Feature: LDBC Business Intelligence Workload - Read + + Background: + Given a graph with space named "ldbc_v0_3_3" + + Scenario: 1. Posting summary + When executing query: + """ + MATCH (message:Message) + WHERE message.Message.creationDate < "20110721220000000" + WITH count(message) AS totalMessageCountInt + WITH toFloat(totalMessageCountInt) AS totalMessageCount + MATCH (message:Message) + WHERE message.Message.creationDate < "20110721220000000" + AND message.Message.content IS NOT NULL + WITH + totalMessageCount, + message, + toInteger(message.Message.creationDate)/10000000000000 AS year + WITH + totalMessageCount, + year, + "Comment" IN tags(message) AS isComment, + CASE + WHEN message.Message.length < 40 THEN 0 + WHEN message.Message.length < 80 THEN 1 + WHEN message.Message.length < 160 THEN 2 + ELSE 3 + END AS lengthCategory, + count(message) AS messageCount, + floor(avg(message.Message.length)) AS averageMessageLength, + sum(message.Message.length) AS sumMessageLength + RETURN + year, + isComment, + lengthCategory, + messageCount, + averageMessageLength, + sumMessageLength, + messageCount / totalMessageCount AS percentageOfMessages + ORDER BY + year DESC, + isComment ASC, + lengthCategory ASC + """ + Then the result should be, in order: + | year | isComment | lengthCategory | messageCount | averageMessageLength | sumMessageLength | percentageOfMessages | + + Scenario: 2. Top tags for country, age, gender, time + When executing query: + """ + MATCH + (country:Country)<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]-(person:Person) + <-[:HAS_CREATOR]-(message:Message)-[:HAS_TAG]->(`tag`:`Tag`) + WHERE message.Message.creationDate >= "20091231230000000" + AND message.Message.creationDate <= "20101107230000000" + AND (id(country) == "Ethiopia" OR id(country) == "Belarus") + WITH + country.Country.name AS countryName, + toInteger(message.Message.creationDate)/100000000000%100 AS month, + person.Person.gender AS gender, + floor((20130101 - person.Person.birthday) / 10000 / 5.0) AS ageGroup, + `tag`.`Tag`.name AS tagName, + message + WITH + countryName, month, gender, ageGroup, tagName, count(message) AS messageCount + WHERE messageCount > 100 + RETURN + countryName, + month, + gender, + ageGroup, + tagName, + messageCount + ORDER BY + messageCount DESC, + tagName ASC, + ageGroup ASC, + gender ASC, + month ASC, + countryName ASC + LIMIT 100 + """ + Then the result should be, in order: + | countryName | month | gender | ageGroup | tagName | messageCount | + + @skip + Scenario: 3. Tag evolution + # TODO: Need an index on tag `Tag`, and fix the expr rewrite bug on toInteger(message1.creationDate)/100000000000%100 + When executing query: + """ + WITH + 2010 AS year1, + 10 AS month1, + 2010 + toInteger(10 / 12.0) AS year2, + 10 % 12 + 1 AS month2 + MATCH (`tag`:`Tag`) + OPTIONAL MATCH (message1:Message)-[:HAS_TAG]->(`tag`) + WHERE toInteger(message1.creationDate)/10000000000000 == year1 + AND toInteger(message1.creationDate)/100000000000%100 == month1 + WITH year2, month2, `tag`, count(message1) AS countMonth1 + OPTIONAL MATCH (message2:Message)-[:HAS_TAG]->(`tag`) + WHERE toInteger(message2.creationDate)/10000000000000 == year2 + AND toInteger(message2.creationDate)/100000000000%100 == month2 + WITH + `tag`, + countMonth1, + count(message2) AS countMonth2 + RETURN + `tag`.name, + countMonth1, + countMonth2, + abs(countMonth1-countMonth2) AS diff + ORDER BY + diff DESC, + `tag`.name ASC + LIMIT 100 + """ + Then the result should be, in any order: + + Scenario: 4. Popular topics in a country + When executing query: + """ + MATCH + (country:Country)<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]- + (person:Person)<-[:HAS_MODERATOR]-(forum:Forum)-[:CONTAINER_OF]-> + (post:Post)-[:HAS_TAG]->(:`Tag`)-[:HAS_TYPE]->(:TagClass {name: "MusicalArtist"}) + WHERE id(country) == "Burma" + RETURN + forum.Forum.id AS forumId, + forum.Forum.title AS forumTitle, + forum.Forum.creationDate AS forumCreationDate, + person.Person.id AS personId, + count(DISTINCT post) AS postCount + ORDER BY + postCount DESC, + forumId ASC + LIMIT 20 + """ + Then the result should be, in order: + | forumId | forumTitle | forumCreationDate | personId | postCount | + + Scenario: 5. Top posters in a country + When executing query: + """ + MATCH + (country:Country)<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]- + (person:Person)<-[:HAS_MEMBER]-(forum:Forum) + WHERE id(country) == "Belarus" + WITH forum, count(person) AS numberOfMembers, forum.Forum.id AS forumId + ORDER BY numberOfMembers DESC, forumId ASC + LIMIT 100 + WITH collect(forum) AS popularForums + UNWIND popularForums AS forum + MATCH + (forum)-[:HAS_MEMBER]->(person:Person) + OPTIONAL MATCH + (person)<-[:HAS_CREATOR]-(post:Post)<-[:CONTAINER_OF]-(popularForum:Forum) + WHERE popularForum IN popularForums + RETURN + person.Person.id AS personId, + person.Person.firstName AS personFirstName, + person.Person.lastName AS personLastName, + person.Person.creationDate AS personCreationDate, + count(DISTINCT post) AS postCount + ORDER BY + postCount DESC, + personId ASC + LIMIT 100 + """ + Then the result should be, in order: + | personId | personFirstName | personLastName | personCreationDate | postCount | + + Scenario: 6. Most active posters of a given topic + When executing query: + """ + MATCH (`tag`:`Tag`)<-[:HAS_TAG]-(message:Message)-[:HAS_CREATOR]->(person:Person) + WHERE id(`tag`) == "Abbas_I_of_Persia" + OPTIONAL MATCH (:Person)-[like:LIKES]->(message) + OPTIONAL MATCH (message)<-[:REPLY_OF]-(comment:`Comment`) + WITH person, count(DISTINCT like) AS likeCount, count(DISTINCT comment) AS replyCount, count(DISTINCT message) AS messageCount + RETURN + person.Person.id AS personId, + replyCount, + likeCount, + messageCount, + 1*messageCount + 2*replyCount + 10*likeCount AS score + ORDER BY + score DESC, + personId ASC + LIMIT 100 + """ + Then the result should be, in order: + | personId | replyCount | likeCount | messageCount | score | + + Scenario: 7. Most authoritative users on a given topic + When executing query: + """ + MATCH (`tag`:`Tag`) + WHERE id(`tag`) == "Arnold_Schwarzenegger" + MATCH (`tag`)<-[:HAS_TAG]-(message1:Message)-[:HAS_CREATOR]->(person1:Person) + MATCH (`tag`)<-[:HAS_TAG]-(message2:Message)-[:HAS_CREATOR]->(person1) + OPTIONAL MATCH (message2)<-[:LIKES]-(person2:Person) + OPTIONAL MATCH (person2)<-[:HAS_CREATOR]-(message3:Message)<-[like:LIKES]-(p3:Person) + RETURN + person1.Person.id AS person1Id, + count(DISTINCT like) AS authorityScore + ORDER BY + authorityScore DESC, + person1Id ASC + LIMIT 100 + """ + Then the result should be, in order: + | person1Id | authorityScore | + + Scenario: 8. Related topics + # NOTICE: I had rewrite the original query + # TODO: WHERE NOT (comment)-[:HAS_TAG]->(tag) + When executing query: + """ + MATCH + (`tag`:`Tag`)<-[:HAS_TAG]-(message:Message), + (message)<-[:REPLY_OF]-(comment:`Comment`)-[:HAS_TAG]->(relatedTag:`Tag`) + WHERE id(`tag`) == "Genghis_Khan" AND NOT `tag` == relatedTag + RETURN + relatedTag.`Tag`.name AS relatedTagName, + count(DISTINCT comment) AS count + ORDER BY + count DESC, + relatedTagName ASC + LIMIT 100 + """ + Then the result should be, in order: + | relatedTagName | count | + + Scenario: 9. Forum with related tags + When executing query: + """ + MATCH + (forum:Forum)-[:HAS_MEMBER]->(person:Person) + WITH forum, count(person) AS members + WHERE members > 200 + MATCH + (forum)-[:CONTAINER_OF]->(post1:Post)-[:HAS_TAG]-> + (:`Tag`)-[:HAS_TYPE]->(:TagClass {name: "BaseballPlayer"}) + WITH forum, count(DISTINCT post1) AS count1 + MATCH + (forum)-[:CONTAINER_OF]->(post2:Post)-[:HAS_TAG]-> + (:`Tag`)-[:HAS_TYPE]->(:TagClass {name: "ChristianBishop"}) + WITH forum, count1, count(DISTINCT post2) AS count2 + RETURN + forum.Forum.id AS forumId, + count1, + count2, + abs(count2-count1) AS diff + ORDER BY + diff DESC, + forumId ASC + LIMIT 100 + """ + Then the result should be, in order: + | forumId | count1 | count2 | diff | + + @skip + Scenario: 10. Central person for a tag + # TODO: 100 * length([(`tag`)<-[interest:HAS_INTEREST]-(friend) | interest]) + When executing query: + """ + MATCH (`tag`:`Tag`) + WHERE id(`tag`) == "John_Rhys-Davies" + OPTIONAL MATCH (`tag`)<-[interest:HAS_INTEREST]-(person:Person) + WITH `tag`, collect(person) AS interestedPersons + OPTIONAL MATCH (`tag`)<-[:HAS_TAG]-(message:Message)-[:HAS_CREATOR]->(person:Person) + WHERE message.creationDate > "20120122000000000" + WITH `tag`, interestedPersons + collect(person) AS persons + UNWIND persons AS person + WITH DISTINCT `tag`, person + WITH + `tag`, + person, + 100 * length([(`tag`)<-[interest:HAS_INTEREST]-(person) | interest]) + + length([(`tag`)<-[:HAS_TAG]-(message:Message)-[:HAS_CREATOR]->(person) WHERE message.creationDate > $date | message]) + AS score + OPTIONAL MATCH (person)-[:KNOWS]-(friend) + WITH + person, + score, + 100 * length([(`tag`)<-[interest:HAS_INTEREST]-(friend) | interest]) + + length([(`tag`)<-[:HAS_TAG]-(message:Message)-[:HAS_CREATOR]->(friend) WHERE message.creationDate > $date | message]) + AS friendScore + RETURN + person.id, + score, + sum(friendScore) AS friendsScore + ORDER BY + score + friendsScore DESC, + person.id ASC + LIMIT 100 + """ + Then a SyntaxError should be raised at runtime: + + @skip + Scenario: 11. Unrelated replies + # TODO: WHERE NOT (message)-[:HAS_TAG]->(:Tag)<-[:HAS_TAG]-(reply) + When executing query: + """ + WITH ['also', 'Pope', 'that', 'James', 'Henry', 'one', 'Green'] AS blacklist + MATCH + (country:Country {name: "Germany"})<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]- + (person:Person)<-[:HAS_CREATOR]-(reply:Comment)-[:REPLY_OF]->(message:Message), + (reply)-[:HAS_TAG]->(tag:Tag) + WHERE NOT (message)-[:HAS_TAG]->(:Tag)<-[:HAS_TAG]-(reply) + AND size([word IN blacklist WHERE reply.content CONTAINS word | word]) = 0 + OPTIONAL MATCH + (:Person)-[like:LIKES]->(reply) + RETURN + person.id, + tag.name, + count(DISTINCT like) AS countLikes, + count(DISTINCT reply) AS countReplies + ORDER BY + countLikes DESC, + person.id ASC, + tag.name ASC + LIMIT 100 + """ + Then a SyntaxError should be raised at runtime: + + Scenario: 12. Trending posts + When executing query: + """ + MATCH + (message:Message)-[:HAS_CREATOR]->(creator:Person), + (message)<-[like:LIKES]-(:Person) + WHERE message.Message.creationDate > "20110721220000000" + WITH message, creator, count(like) AS likeCount + WHERE likeCount > 400 + RETURN + message.Message.id AS messageId, + message.Message.creationDate AS messageCreationDate, + creator.Person.firstName AS creatorFirstName, + creator.Person.lastName AS creatorLastName, + likeCount + ORDER BY + likeCount DESC, + messageId ASC + LIMIT 100 + """ + Then the result should be, in order: + | messageId | messageCreationDate | creatorFirstName | creatorLastName | likeCount | + + Scenario: 13. Popular tags per month in a country + When executing query: + """ + MATCH (country:Country)<-[:IS_LOCATED_IN]-(message:Message) + WHERE id(country) == "Burma" + OPTIONAL MATCH (message)-[:HAS_TAG]->(`tag`:`Tag`) + WITH + toInteger(message.Message.creationDate)/10000000000000 AS year, + toInteger(message.Message.creationDate)/100000000000%100 AS month, + message, + `tag` + WITH year, month, count(message) AS popularity, `tag`, `tag`.`Tag`.name AS tagName + ORDER BY popularity DESC, tagName ASC + WITH + year, + month, + collect([`tag`.`Tag`.name, popularity]) AS popularTags + WITH + year, + month, + [popularTag IN popularTags WHERE popularTag[0] IS NOT NULL] AS popularTags + RETURN + year, + month, + popularTags[0..5] AS topPopularTags + ORDER BY + year DESC, + month ASC + LIMIT 100 + """ + Then the result should be, in order: + | year | month | topPopularTags | + + Scenario: 14. Top thread initiators + # TODO: [:REPLY_OF*0..] + When executing query: + """ + MATCH (person:Person)<-[:HAS_CREATOR]-(post:Post)<-[:REPLY_OF*0..100]-(reply:Message) + WHERE reply.Message.creationDate >= "20120531220000000" + AND reply.Message.creationDate <= "20120630220000000" + WITH person, post, reply + WHERE post.Post.creationDate >= "20120531220000000" + AND post.Post.creationDate <= "20120630220000000" + RETURN + person.Person.id AS personId, + person.Person.firstName AS personFirstName, + person.Person.lastName AS personLastName, + count(DISTINCT post) AS threadCount, + count(DISTINCT reply) AS messageCount + ORDER BY + messageCount DESC, + personId ASC + LIMIT 100 + """ + Then the result should be, in any order: + | personId | personFirstName | personLastName | threadCount | messageCount | + + Scenario: 15. Social normals + When executing query: + """ + MATCH + (country:Country) + WHERE id(country) == "Burma" + MATCH + (country)<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]-(person1:Person) + OPTIONAL MATCH + (country)<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]-(friend1:Person), + (person1)-[:KNOWS]-(friend1) + WITH country, person1, count(friend1) AS friend1Count + WITH country, avg(friend1Count) AS socialNormalFloat + WITH country, floor(socialNormalFloat) AS socialNormal + MATCH + (country)<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]-(person2:Person) + OPTIONAL MATCH + (country)<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]-(friend2:Person)-[:KNOWS]-(person2) + WITH country, person2, count(friend2) AS friend2Count, socialNormal + WHERE friend2Count == socialNormal + RETURN + person2.Person.id AS person2Id, + friend2Count AS count + ORDER BY + person2Id ASC + LIMIT 100 + """ + Then the result should be, in order: + | person2Id | count | + + Scenario: 16. Experts in social circle + When executing query: + """ + MATCH + (n:Person)-[:KNOWS*3..5]-(person:Person) + WHERE id(n) == "19791209310731" + WITH DISTINCT person + MATCH + (person)-[:IS_LOCATED_IN]->(:City)-[:IS_PART_OF]->(:Country {name: "Pakistan"}), + (person)<-[:HAS_CREATOR]-(message:Message)-[:HAS_TAG]->(:`Tag`)-[:HAS_TYPE]-> + (:TagClass {name: "MusicalArtist"}) + MATCH + (message)-[:HAS_TAG]->(`tag`:`Tag`) + RETURN + person.Person.id AS personId, + `tag`.`Tag`.name AS tagName, + count(DISTINCT message) AS messageCount + ORDER BY + messageCount DESC, + tagName ASC, + personId ASC + LIMIT 100 + """ + Then the result should be, in order: + | personId | tagName | messageCount | + + Scenario: 17. Friend triangles + When executing query: + """ + MATCH (country:Country) + WHERE id(country) == "Spain" + MATCH (a:Person)-[:IS_LOCATED_IN]->(:City)-[:IS_PART_OF]->(country) + MATCH (b:Person)-[:IS_LOCATED_IN]->(:City)-[:IS_PART_OF]->(country) + MATCH (c:Person)-[:IS_LOCATED_IN]->(:City)-[:IS_PART_OF]->(country) + MATCH (a)-[:KNOWS]-(b), (b)-[:KNOWS]-(c), (c)-[:KNOWS]-(a) + WHERE a.Person.id < b.Person.id + AND b.Person.id < c.Person.id + RETURN count(*) AS count + """ + Then the result should be, in any order: + | count | + | 0 | + + Scenario: 18. How many persons have a given number of messages + # TODO: [:REPLY_OF*0..] + When executing query: + """ + MATCH (person:Person) + OPTIONAL MATCH (person)<-[:HAS_CREATOR]-(message:Message)-[:REPLY_OF*0..100]->(post:Post) + WHERE message.Message.content IS NOT NULL + AND message.Message.length < 20 + AND message.Message.creationDate > "20110722000000000" + AND post.Post.language IN ["ar"] + WITH + person, + count(message) AS messageCount + RETURN + messageCount, + count(person) AS personCount + ORDER BY + personCount DESC, + messageCount DESC + """ + Then the result should be, in order: + | messageCount | personCount | + + Scenario: 19. Stranger’s interaction + # NOTICE: A big rewritten, have to test the correctness + When executing query: + """ + MATCH + (tagClass:TagClass)<-[:HAS_TYPE]-(:`Tag`)<-[:HAS_TAG]- + (forum1:Forum)-[:HAS_MEMBER]->(stranger:Person) + WHERE id(tagClass) == "MusicalArtist" + WITH DISTINCT stranger + MATCH + (tagClass:TagClass)<-[:HAS_TYPE]-(:`Tag`)<-[:HAS_TAG]- + (forum2:Forum)-[:HAS_MEMBER]->(stranger) + WHERE id(tagClass) == "OfficeHolder" + WITH DISTINCT stranger + MATCH + (person:Person)<-[:HAS_CREATOR]-(comment:`Comment`)-[:REPLY_OF*100]->(message:Message)-[:HAS_CREATOR]->(stranger) + OPTIONAL MATCH (person)-[knows:KNOWS]-(stranger) + OPTIONAL MATCH (message)-[replyOf:REPLY_OF*100]->(:Message)-[hasCreator:HAS_CREATOR]->(stranger) + WHERE person.Person.birthday > "19890101" + AND person <> stranger + AND knows IS NULL + AND (replyOf IS NULL OR hasCreator IS NULL) + RETURN + person.Person.id AS personId, + count(DISTINCT stranger) AS strangersCount, + count(comment) AS interactionCount + ORDER BY + interactionCount DESC, + personId ASC + LIMIT 100 + """ + Then the result should be, in order: + | personId | strangersCount | interactionCount | + + Scenario: 20. High-level topics + When executing query: + """ + MATCH + (tagClass:TagClass)<-[:IS_SUBCLASS_OF*0..100]- + (:TagClass)<-[:HAS_TYPE]-(`tag`:`Tag`)<-[:HAS_TAG]-(message:Message) + WHERE id(tagClass) IN ['Writer', 'Single', 'Country'] + RETURN + tagClass.TagClass.name AS tagClassName, + count(DISTINCT message) AS messageCount + ORDER BY + messageCount DESC, + tagClassName ASC + LIMIT 100 + """ + Then the result should be, in order: + | tagClassName | messageCount | diff --git a/tests/tck/ldbc/interactive_workload/ComplexReads.feature b/tests/tck/ldbc/interactive_workload/ComplexReads.feature new file mode 100644 index 00000000000..52acc82bda7 --- /dev/null +++ b/tests/tck/ldbc/interactive_workload/ComplexReads.feature @@ -0,0 +1,349 @@ +# Copyright (c) 2021 vesoft inc. All rights reserved. +# +# This source code is licensed under Apache 2.0 License. +Feature: LDBC Interactive Workload - Complex Reads + + Background: + Given a graph with space named "ldbc_v0_3_3" + + @skip + Scenario: 1. Friends with certain name + # TODO: shortestPath syntax is not supported for now + When executing query: + """ + MATCH p=shortestPath((person:Person)-[path:KNOWS*1..3]-(friend:Person {firstName: "$firstName"})) + WHERE id(person) == "" + WHERE person <> friend + WITH friend, length(p) AS distance + ORDER BY distance ASC, friend.lastName ASC, toInteger(friend.id) ASC + LIMIT 20 + MATCH (friend)-[:IS_LOCATED_IN]->(friendCity:Place) + OPTIONAL MATCH (friend)-[studyAt:STUDY_AT]->(uni:Organisation)-[:IS_LOCATED_IN]->(uniCity:Place) + WITH + friend, + collect( + CASE uni.name + WHEN null THEN null + ELSE [uni.name, studyAt.classYear, uniCity.name] + END + ) AS unis, + friendCity, + distance + OPTIONAL MATCH (friend)-[workAt:WORK_AT]->(company:Organisation)-[:IS_LOCATED_IN]->(companyCountry:Place) + WITH + friend, + collect( + CASE company.name + WHEN null THEN null + ELSE [company.name, workAt.workFrom, companyCountry.name] + END + ) AS companies, + unis, + friendCity, + distance + RETURN + friend.id AS friendId, + friend.lastName AS friendLastName, + distance AS distanceFromPerson, + friend.birthday AS friendBirthday, + friend.creationDate AS friendCreationDate, + friend.gender AS friendGender, + friend.browserUsed AS friendBrowserUsed, + friend.locationIP AS friendLocationIp, + friend.email AS friendEmails, + friend.speaks AS friendLanguages, + friendCity.name AS friendCityName, + unis AS friendUniversities, + companies AS friendCompanies + ORDER BY distanceFromPerson ASC, friendLastName ASC, toInteger(friendId) ASC + LIMIT 20 + """ + Then a SyntaxError should be raised at runtime: syntax error near `shortestPath' + + Scenario: 2. Recent messages by your friends + When executing query: + """ + MATCH (n:Person)-[:KNOWS]-(friend:Person)<-[:HAS_CREATOR]-(message:Message) + WHERE id(n) == "" and message.Message.creationDate <= $maxDate + RETURN + friend.Person.id AS personId, + friend.Person.firstName AS personFirstName, + friend.Person.lastName AS personLastName, + toInteger(message.Message.id) AS messageId, + CASE exists(message.Message.content) + WHEN true THEN message.Message.content + ELSE message.Message.imageFile + END AS messageContent, + message.Message.creationDate AS messageCreationDate + ORDER BY messageCreationDate DESC, messageId ASC + LIMIT 20 + """ + Then the result should be, in any order: + | personId | personFirstName | personLastName | messageId | messageContent | messageCreationDate | + + @skip + Scenario: 3. Friends and friends of friends that have been to given countries + When executing query: + # TODO: WHERE not((friend)-[:IS_LOCATED_IN]->()-[:IS_PART_OF]->(countryX)) not supported now + """ + MATCH (person:Person)-[:KNOWS*1..2]-(friend:Person)<-[:HAS_CREATOR]-(messageX:Message), + (messageX)-[:IS_LOCATED_IN]->(countryX:Place) + WHERE + id(person) == "" + AND not(person==friend) + AND not((friend)-[:IS_LOCATED_IN]->()-[:IS_PART_OF]->(countryX)) + AND countryX.name=$countryXName AND messageX.creationDate>=$startDate + AND messageX.creationDate<$endDate + WITH friend, count(DISTINCT messageX) AS xCount + MATCH (friend)<-[:HAS_CREATOR]-(messageY:Message)-[:IS_LOCATED_IN]->(countryY:Place) + WHERE + countryY.name="$countryYName" + AND not((friend)-[:IS_LOCATED_IN]->()-[:IS_PART_OF]->(countryY)) + AND messageY.creationDate>="$startDate" + AND messageY.creationDate<"$endDate" + WITH + friend.id AS personId, + friend.firstName AS personFirstName, + friend.lastName AS personLastName, + xCount, + count(DISTINCT messageY) AS yCount + RETURN + personId, + personFirstName, + personLastName, + xCount, + yCount, + xCount + yCount AS count + ORDER BY count DESC, toInteger(personId) ASC + LIMIT 20 + """ + Then a SyntaxError should be raised at runtime: + + Scenario: 4. New topics + When executing query: + """ + MATCH (person:Person)-[:KNOWS]-(:Person)<-[:HAS_CREATOR]-(post:Post)-[:HAS_TAG]->(`tag`:`Tag`) + WHERE id(person) == "" AND post.Post.creationDate >= "$startDate" + AND post.Post.creationDate < "$endDate" + WITH person, count(post) AS postsOnTag, `tag` + OPTIONAL MATCH (person)-[:KNOWS]-()<-[:HAS_CREATOR]-(oldPost:Post)-[:HAS_TAG]->(`tag`) + WHERE oldPost.Post.creationDate < $startDate + WITH person, postsOnTag, `tag`, count(oldPost) AS cp + WHERE cp == 0 + RETURN + `tag`.`Tag`.name AS tagName, + sum(postsOnTag) AS postCount + ORDER BY postCount DESC, tagName ASC + """ + Then the result should be, in any order: + | tagName | postCount | + + Scenario: 5. New groups + When executing query: + """ + MATCH (person:Person)-[:KNOWS*1..2]-(friend:Person)<-[membership:HAS_MEMBER]-(forum:Forum) + WHERE id(person) == "" AND membership.joinDate>"$minDate" + AND not(person==friend) + WITH DISTINCT friend, forum + OPTIONAL MATCH (friend)<-[:HAS_CREATOR]-(post:Post)<-[:CONTAINER_OF]-(forum) + WITH forum, count(post) AS postCount + RETURN + toInteger(forum.Forum.id) AS forumId, + forum.Forum.title AS forumTitle, + postCount + ORDER BY postCount DESC, forumId ASC + LIMIT 20 + """ + Then the result should be, in any order: + | forumId | forumTitle | postCount | + + @skip + Scenario: 6. Tag co-occurrence + # TODO: WHERE (commonPost)-[:HAS_CREATOR]->(friend) + When executing query: + """ + MATCH + (person:Person)-[:KNOWS*1..2]-(friend:Person), + (friend)<-[:HAS_CREATOR]-(friendPost:Post)-[:HAS_TAG]->(knownTag:`Tag` {name:"$tagName"}) + WHERE id(person) == "" AND not(person==friend) + MATCH (friendPost)-[:HAS_TAG]->(commonTag:`Tag`) + WHERE not(commonTag==knownTag) + WITH DISTINCT commonTag, knownTag, friend + MATCH (commonTag)<-[:HAS_TAG]-(commonPost:Post)-[:HAS_TAG]->(knownTag) + WHERE (commonPost)-[:HAS_CREATOR]->(friend) + RETURN + commonTag.name AS tagName, + count(commonPost) AS postCount + ORDER BY postCount DESC, tagName ASC + LIMIT 10 + """ + Then a SyntaxError should be raised at runtime: + + @skip + Scenario: 7. Recent likers + # TODO: RETURN not((liker)-[:KNOWS]-(person)) AS isNew + When executing query: + """ + MATCH (person:Person)<-[:HAS_CREATOR]-(message:Message)<-[like:LIKES]-(liker:Person) + WHERE id(person) == "" + WITH liker, message, like.creationDate AS likeTime, person + ORDER BY likeTime DESC, toInteger(message.id) ASC + WITH + liker, + head(collect({msg: message, likeTime: likeTime})) AS latestLike, + person + RETURN + toInteger(liker.id) AS personId, + liker.firstName AS personFirstName, + liker.lastName AS personLastName, + latestLike.likeTime AS likeCreationDate, + latestLike.msg.id AS messageId, + CASE exists(latestLike.msg.content) + WHEN true THEN latestLike.msg.content + ELSE latestLike.msg.imageFile + END AS messageContent, + latestLike.msg.creationDate AS messageCreationDate, + not((liker)-[:KNOWS]-(person)) AS isNew + ORDER BY likeCreationDate DESC, personId ASC + LIMIT 20 + """ + Then a SyntaxError should be raised at runtime: + + Scenario: 8. Recent replies + When executing query: + """ + MATCH + (start:Person)<-[:HAS_CREATOR]-(:Message)<-[:REPLY_OF]-(comment:`Comment`)-[:HAS_CREATOR]->(person:Person) + WHERE id(start) == "" + RETURN + person.Person.id AS personId, + person.Person.firstName AS personFirstName, + person.Person.lastName AS personLastName, + comment.`Comment`.creationDate AS commentCreationDate, + toInteger(comment.`Comment`.id) AS commentId, + comment.`Comment`.content AS commentContent + ORDER BY commentCreationDate DESC, commentId ASC + LIMIT 20 + """ + Then the result should be, in any order: + | personId | personFirstName | personLastName | commentCreationDate | commentId | commentContent | + + Scenario: 9. Recent messages by friends or friends of friends + When executing query: + """ + MATCH (n:Person)-[:KNOWS*1..2]-(friend:Person)<-[:HAS_CREATOR]-(message:Message) + WHERE id(n) == "" AND message.Message.creationDate < "$maxDate" + RETURN DISTINCT + friend.Person.id AS personId, + friend.Person.firstName AS personFirstName, + friend.Person.lastName AS personLastName, + toInteger(message.Message.id) AS messageId, + CASE exists(message.Message.content) + WHEN true THEN message.Message.content + ELSE message.Message.imageFile + END AS messageContent, + message.Message.creationDate AS messageCreationDate + ORDER BY messageCreationDate DESC, messageId ASC + LIMIT 20 + """ + Then the result should be, in any order: + | personId | personFirstName | personLastName | messageId | messageContent | messageCreationDate | + + @skip + Scenario: 10. Friend recommendation + # TODO: WHERE patterns, WITH patterns + When executing query: + """ + MATCH (person:Person)-[:KNOWS*2..2]-(friend:Person)-[:IS_LOCATED_IN]->(city:Place) + WHERE id(person) == "" AND + ((friend.birthday/100%100 = "$month" AND friend.birthday%100 >= 21) OR + (friend.birthday/100%100 = "$nextMonth" AND friend.birthday%100 < 22)) + AND not(friend=person) + AND not((friend)-[:KNOWS]-(person)) + WITH DISTINCT friend, city, person + OPTIONAL MATCH (friend)<-[:HAS_CREATOR]-(post:Post) + WITH friend, city, collect(post) AS posts, person + WITH + friend, + city, + length(posts) AS postCount, + length([p IN posts WHERE (p)-[:HAS_TAG]->(:`Tag`)<-[:HAS_INTEREST]-(person)]) AS commonPostCount + RETURN + friend.id AS personId, + friend.firstName AS personFirstName, + friend.lastName AS personLastName, + commonPostCount - (postCount - commonPostCount) AS commonInterestScore, + friend.gender AS personGender, + city.name AS personCityName + ORDER BY commonInterestScore DESC, toInteger(personId) ASC + LIMIT 10 + """ + Then a SyntaxError should be raised at runtime: + + Scenario: 11. Job referral + When executing query: + """ + MATCH (person:Person)-[:KNOWS*1..2]-(friend:Person) + WHERE id(person) == "" AND not(person==friend) + WITH DISTINCT friend + MATCH (friend)-[workAt:WORK_AT]->(company:Organisation)-[:IS_LOCATED_IN]->(:Place {name:"$countryName"}) + WHERE workAt.workFrom < $workFromYear + RETURN + toInteger(friend.Person.id) AS personId, + friend.Person.firstName AS personFirstName, + friend.Person.lastName AS personLastName, + company.Organisation.name AS organizationName, + workAt.workFrom AS organizationWorkFromYear + ORDER BY organizationWorkFromYear ASC, personId ASC, organizationName DESC + LIMIT 10 + """ + Then the result should be, in any order: + | personId | personFirstName | personLastName | organizationName | organizationWorkFromYear | + + Scenario: 12. Expert search + # TODO: [:IS_SUBCLASS_OF*0..] + When executing query: + """ + MATCH (n:Person)-[:KNOWS]-(friend:Person)<-[:HAS_CREATOR]-(`comment`:`Comment`)-[:REPLY_OF]->(:Post)-[:HAS_TAG]->(`tag`:`Tag`), + (`tag`)-[:HAS_TYPE]->(tagClass:TagClass)-[:IS_SUBCLASS_OF*0..100]->(baseTagClass:TagClass) + WHERE id(n)=="" AND (tagClass.TagClass.name == "$tagClassName" OR baseTagClass.TagClass.name == "$tagClassName") + RETURN + toInteger(friend.Person.id) AS personId, + friend.Person.firstName AS personFirstName, + friend.Person.lastName AS personLastName, + collect(DISTINCT `tag`.`Tag`.name) AS tagNames, + count(DISTINCT `comment`) AS replyCount + ORDER BY replyCount DESC, personId ASC + LIMIT 20 + """ + Then the result should be, in any order: + | personId | personFirstName | personLastName | tagNames | replyCount | + + @skip + Scenario: 13. Single shortest path + # TODO: shortestPath + When executing query: + """ + MATCH (person1:Person {id:$person1Id}), (person2:Person {id:$person2Id}) + OPTIONAL MATCH path = shortestPath((person1)-[:KNOWS*]-(person2)) + RETURN + CASE path IS NULL + WHEN true THEN -1 + ELSE length(path) + END AS shortestPathLength; + """ + Then a SyntaxError should be raised at runtime: + + @skip + Scenario: 14. Trusted connection paths + # TODO: allShortestPaths + When executing query: + """ + MATCH path = allShortestPaths((person1:Person {id:$person1Id})-[:KNOWS*..15]-(person2:Person {id:$person2Id})) + WITH nodes(path) AS pathNodes + RETURN + extract(n IN pathNodes | n.id) AS personIdsInPath, + reduce(weight=0.0, idx IN range(1,size(pathNodes)-1) | extract(prev IN [pathNodes[idx-1]] | extract(curr IN [pathNodes[idx]] | weight + length((curr)<-[:HAS_CREATOR]-(:Comment)-[:REPLY_OF]->(:Post)-[:HAS_CREATOR]->(prev))*1.0 + length((prev)<-[:HAS_CREATOR]-(:Comment)-[:REPLY_OF]->(:Post)-[:HAS_CREATOR]->(curr))*1.0 + length((prev)-[:HAS_CREATOR]-(:Comment)-[:REPLY_OF]-(:Comment)-[:HAS_CREATOR]-(curr))*0.5) )[0][0]) AS pathWight + ORDER BY pathWight DESC + """ + Then a SyntaxError should be raised at runtime: diff --git a/tests/tck/ldbc/interactive_workload/ShortReads.feature b/tests/tck/ldbc/interactive_workload/ShortReads.feature new file mode 100644 index 00000000000..f7c40564202 --- /dev/null +++ b/tests/tck/ldbc/interactive_workload/ShortReads.feature @@ -0,0 +1,131 @@ +# Copyright (c) 2021 vesoft inc. All rights reserved. +# +# This source code is licensed under Apache 2.0 License. +Feature: LDBC Interactive Workload - Short Reads + + Background: + Given a graph with space named "ldbc_v0_3_3" + + Scenario: 1. Friends with certain name + When executing query: + """ + MATCH (n:Person)-[:IS_LOCATED_IN]->(p:Place) + WHERE id(n)=="" + RETURN + n.Person.firstName AS firstName, + n.Person.lastName AS lastName, + n.Person.birthday AS birthday, + n.Person.locationIP AS locationIP, + n.Person.browserUsed AS browserUsed, + p.Place.id AS cityId, + n.Person.gender AS gender, + n.Person.creationDate AS creationDate + """ + Then the result should be, in any order: + | firstName | lastName | birthday | locationIP | browserUsed | cityId | gender | creationDate | + + Scenario: 2. Recent messages of a person + # TODO: [:REPLY_OF*0..] is not supported, instead by [:REPLY_OF*0..100] for now + When executing query: + """ + MATCH (n:Person)<-[:HAS_CREATOR]-(m:Message)-[:REPLY_OF*0..100]->(p:Post) + WHERE id(n)=="" + MATCH (p)-[:HAS_CREATOR]->(c) + RETURN + m.Message.id as messageId, + CASE exists(m.Message.content) + WHEN true THEN m.Message.content + ELSE m.Message.imageFile + END AS messageContent, + m.Message.creationDate AS messageCreationDate, + p.Post.id AS originalPostId, + c.Person.id AS originalPostAuthorId, + c.Person.firstName as originalPostAuthorFirstName, + c.Person.lastName as originalPostAuthorLastName + ORDER BY messageCreationDate DESC + LIMIT 10 + """ + Then the result should be, in any order: + | messageId | messageContent | messageCreationDate | originalPostId | originalPostAuthorId | originalPostAuthorFirstName | originalPostAuthorLastName | + + Scenario: 3. Friends of a person + When executing query: + """ + MATCH (n:Person)-[r:KNOWS]-(friend) + WHERE id(n) == "" + RETURN + toInteger(friend.Person.id) AS personId, + friend.Person.firstName AS firstName, + friend.Person.lastName AS lastName, + r.creationDate AS friendshipCreationDate + ORDER BY friendshipCreationDate DESC, personId ASC + """ + Then the result should be, in any order: + | personId | firstName | lastName | friendshipCreationDate | + + Scenario: 4. Content of a message + When executing query: + """ + MATCH (m:Message) + WHERE id(m) == "" + RETURN + m.Message.creationDate as messageCreationDate, + CASE exists(m.Message.content) + WHEN true THEN m.Message.content + ELSE m.Message.imageFile + END AS messageContent + """ + Then the result should be, in any order: + | messageCreationDate | messageContent | + + Scenario: 5. Given a Message, retrieve its author + When executing query: + """ + MATCH (m:Message)-[:HAS_CREATOR]->(p:Person) + WHERE id(m) == "" + RETURN + p.Person.id AS personId, + p.Person.firstName AS firstName, + p.Person.lastName AS lastName + """ + Then the result should be, in any order: + | personId | firstName | lastName | + + Scenario: 6. Forum of a message + # TODO: [:REPLY_OF*0..] is not supported, instead by [:REPLY_OF*0..100] for now + When executing query: + """ + MATCH (m:Message)-[:REPLY_OF*0..100]->(p:Post)<-[:CONTAINER_OF]-(f:Forum)-[:HAS_MODERATOR]->(mod:Person) + WHERE id(m) == "" + RETURN + f.Forum.id AS forumId, + f.Forum.title AS forumTitle, + mod.Person.id AS moderatorId, + mod.Person.firstName AS moderatorFirstName, + mod.Person.lastName AS moderatorLastName + """ + Then the result should be, in any order: + | forumId | forumTitle | moderatorId | moderatorFirstName | moderatorLastName | + + Scenario: 7. Replies of a message + # Notice: Comment is a keyword, instead by `Comment` + When executing query: + """ + MATCH (m:Message)<-[:REPLY_OF]-(c:`Comment`)-[:HAS_CREATOR]->(p:Person) + WHERE id(m) == "" + OPTIONAL MATCH (m)-[:HAS_CREATOR]->(a:Person)-[r:KNOWS]-(p) + RETURN + c.`Comment`.id AS commentId, + c.`Comment`.content AS commentContent, + c.`Comment`.creationDate AS commentCreationDate, + p.Person.id AS replyAuthorId, + p.Person.firstName AS replyAuthorFirstName, + p.Person.lastName AS replyAuthorLastName, + CASE r + WHEN null THEN false + ELSE true + END AS replyAuthorKnowsOriginalMessageAuthor + ORDER BY commentCreationDate DESC, replyAuthorId + """ + Then the result should be, in any order: + | commentId | commentContent | commentCreationDate | replyAuthorId | replyAuthorFirstName | replyAuthorLastName | replyAuthorKnowsOriginalMessageAuthor | diff --git a/tests/tck/steps/test_ldbc.py b/tests/tck/steps/test_ldbc.py new file mode 100644 index 00000000000..3a3a3c0e407 --- /dev/null +++ b/tests/tck/steps/test_ldbc.py @@ -0,0 +1,7 @@ +# Copyright (c) 2021 vesoft inc. All rights reserved. +# +# This source code is licensed under Apache 2.0 License. + +from pytest_bdd import scenarios + +scenarios('ldbc')