diff --git a/src/codec/RowReader.h b/src/codec/RowReader.h index 7ba61ff7633..91f09896a39 100644 --- a/src/codec/RowReader.h +++ b/src/codec/RowReader.h @@ -15,7 +15,7 @@ namespace nebula { /** - * This class decodes one row of data + * @brief This class decodes one row of data */ class RowReader { public: @@ -33,6 +33,9 @@ class RowReader { explicit Cell(const Iterator* iter) : iter_(iter) {} }; + /** + * @brief Helper class to iterate over all fields in a row + */ class Iterator final { friend class Cell; friend class RowReader; @@ -65,35 +68,91 @@ class RowReader { public: virtual ~RowReader() = default; + /** + * @brief Get the property value by property name + * + * @param prop Property name + * @return Value Property value + */ virtual Value getValueByName(const std::string& prop) const noexcept = 0; + + /** + * @brief Get the property value by index in schema + * + * @param index Index in Schema + * @return Value Property value + */ virtual Value getValueByIndex(const int64_t index) const noexcept = 0; + + /** + * @brief Get the timestamp in value + * + * @return int64_t UTC + */ virtual int64_t getTimestamp() const noexcept = 0; + /** + * @brief The row reader version, only 1 or 2 is legal + * + * @return int32_t Reader version + */ virtual int32_t readerVer() const noexcept = 0; - // Return the number of bytes used for the header info + /** + * @brief Return the number of bytes used for the header info + */ virtual size_t headerLen() const noexcept = 0; + /** + * @brief Iterator points to the first field + * + * @return Iterator + */ virtual Iterator begin() const noexcept { return Iterator(this, 0); } + /** + * @brief Iterator points to the last filed + * + * @return const Iterator& + */ virtual const Iterator& end() const noexcept { return endIter_; } + /** + * @brief The schema version encoded in value + * + * @return SchemaVer Schema version + */ virtual SchemaVer schemaVer() const noexcept { return schema_->getVersion(); } + /** + * @brief Count of fields + * + * @return size_t + */ virtual size_t numFields() const noexcept { return schema_->getNumFields(); } + /** + * @brief Get the schema of row data + * + * @return const meta::SchemaProviderIf* + */ virtual const meta::SchemaProviderIf* getSchema() const { return schema_; } + /** + * @brief Get the raw value in kv engine + * + * @return const std::string + */ virtual const std::string getData() const { return data_.toString(); } @@ -104,6 +163,13 @@ class RowReader { RowReader() : endIter_(this) {} + /** + * @brief Reset the row reader with schema and data + * + * @param schema + * @param row + * @return Whether reset succeed + */ virtual bool resetImpl(meta::SchemaProviderIf const* schema, folly::StringPiece row) noexcept; private: diff --git a/src/codec/RowReaderV1.cpp b/src/codec/RowReaderV1.cpp index 5e7a28404bc..877e1e35cee 100644 --- a/src/codec/RowReaderV1.cpp +++ b/src/codec/RowReaderV1.cpp @@ -43,7 +43,7 @@ bool RowReaderV1::resetImpl(meta::SchemaProviderIf const* schema, folly::StringP return true; } else { // Invalid data - LOG(ERROR) << "Invalid row data: " << toHexStr(row); + LOG(WARNING) << "Invalid row data: " << toHexStr(row); return false; } } @@ -72,7 +72,7 @@ bool RowReaderV1::processHeader(folly::StringPiece row) { uint32_t numOffsets = (numFields >> 4); if (numBytesForOffset_ * numOffsets + verBytes + 1 > row.size()) { // Data is too short - LOG(ERROR) << "Row data is too short: " << toHexStr(row); + LOG(WARNING) << "Row data is too short: " << toHexStr(row); return false; } offsets_.resize(numFields + 1, -1); diff --git a/src/codec/RowReaderV2.cpp b/src/codec/RowReaderV2.cpp index 4c82b04fdc5..fc213cbf568 100644 --- a/src/codec/RowReaderV2.cpp +++ b/src/codec/RowReaderV2.cpp @@ -200,7 +200,7 @@ Value RowReaderV2::getValueByIndex(const int64_t index) const noexcept { // Parse a geography from the wkb, normalize it and then verify its validity. auto geogRet = Geography::fromWKB(wkb, true, true); if (!geogRet.ok()) { - LOG(ERROR) << "Geography::fromWKB failed: " << geogRet.status(); + LOG(WARNING) << "Geography::fromWKB failed: " << geogRet.status(); return Value::kNullBadData; // Is it ok to return Value::kNullBadData? } return std::move(geogRet).value(); diff --git a/src/codec/RowReaderWrapper.cpp b/src/codec/RowReaderWrapper.cpp index bda7e106a6a..87b8beeb22a 100644 --- a/src/codec/RowReaderWrapper.cpp +++ b/src/codec/RowReaderWrapper.cpp @@ -107,7 +107,7 @@ bool RowReaderWrapper::reset(meta::SchemaProviderIf const* schema, currReader_ = &readerV2_; return true; } else { - LOG(ERROR) << "Unsupported row reader version " << readerVer; + LOG(WARNING) << "Unsupported row reader version " << readerVer; currReader_ = nullptr; return false; } @@ -171,7 +171,7 @@ void RowReaderWrapper::getVersions(const folly::StringPiece& row, // presents verBytes = row[index++] & 0x07; } else { - LOG(ERROR) << "Invalid reader version: " << readerVer; + LOG(WARNING) << "Invalid reader version: " << readerVer; schemaVer = -1; return; } @@ -180,7 +180,7 @@ void RowReaderWrapper::getVersions(const folly::StringPiece& row, if (verBytes > 0) { if (verBytes + 1 > row.size()) { // Data is too short - LOG(ERROR) << "Row data is too short: " << toHexStr(row); + LOG(WARNING) << "Row data is too short: " << toHexStr(row); schemaVer = -1; return; } diff --git a/src/codec/RowReaderWrapper.h b/src/codec/RowReaderWrapper.h index acd3a11ad80..a619d6d3490 100644 --- a/src/codec/RowReaderWrapper.h +++ b/src/codec/RowReaderWrapper.h @@ -14,6 +14,9 @@ namespace nebula { +/** + * @brief A wrapper class to hide details of RowReaderV1 and RowReaderV2 + */ class RowReaderWrapper : public RowReader { FRIEND_TEST(RowReaderV1, headerInfo); FRIEND_TEST(RowReaderV1, encodedData); @@ -26,6 +29,11 @@ class RowReaderWrapper : public RowReader { RowReaderWrapper& operator=(const RowReaderWrapper&) = delete; + /** + * @brief Move constructor of row reader wrapper + * + * @param rhs + */ RowReaderWrapper(RowReaderWrapper&& rhs) { this->readerVer_ = rhs.readerVer_; if (this->readerVer_ == 1) { @@ -39,6 +47,12 @@ class RowReaderWrapper : public RowReader { } } + /** + * @brief Move assign operator + * + * @param rhs + * @return RowReaderWrapper& + */ RowReaderWrapper& operator=(RowReaderWrapper&& rhs) { this->readerVer_ = rhs.readerVer_; if (this->readerVer_ == 1) { @@ -53,35 +67,97 @@ class RowReaderWrapper : public RowReader { return *this; } + /** + * @brief Generate a row reader wrapper of tag data + * + * @param schemaMan Schema manager, used to find the related schema of data + * @param space SpaceId + * @param tag TagId + * @param row Value in kv engine + * @return RowReaderWrapper The row reader wrapper + */ static RowReaderWrapper getTagPropReader(meta::SchemaManager* schemaMan, GraphSpaceID space, TagID tag, folly::StringPiece row); + /** + * @brief Generate a row reader wrapper of edge data + * + * @param schemaMan Schema manager, used to find the related schema of data + * @param space SpaceId + * @param tag TagId + * @param row Value in kv engine + * @return RowReaderWrapper The row reader wrapper + */ static RowReaderWrapper getEdgePropReader(meta::SchemaManager* schemaMan, GraphSpaceID space, EdgeType edge, folly::StringPiece row); + /** + * @brief Generate a row reader wrapper of data + * + * @param schema + * @param row + * @return RowReaderWrapper + */ static RowReaderWrapper getRowReader(meta::SchemaProviderIf const* schema, folly::StringPiece row); - // notice: the schemas are from oldest to newest, - // usually from getAllVerTagSchema or getAllVerEdgeSchema in SchemaMan + /** + * @brief Generate a row reader wrapper of data, the schemas are stored in vector. + * notice: the schemas are from oldest to newest, + * usually from getAllVerTagSchema or getAllVerEdgeSchema in SchemaMan + * + * @param schemas + * @param row + * @return RowReaderWrapper + */ static RowReaderWrapper getRowReader( const std::vector>& schemas, folly::StringPiece row); + /** + * @brief Construct a new row reader wrapper + * + * @param schema + * @param row + * @param readerVer Row reader version + */ RowReaderWrapper(const meta::SchemaProviderIf* schema, const folly::StringPiece& row, int32_t& readerVer); + /** + * @brief Reset current row reader wrapper to of given schema, data and reader version + * + * @param schema + * @param row + * @param readVer + * @return Whether reset succeed + */ bool reset(meta::SchemaProviderIf const* schema, folly::StringPiece row, int32_t readVer) noexcept; + /** + * @brief Reset current row reader wrapper to of given schema and data + * + * @param schema + * @param row + * @return Whether reset succeed + */ bool reset(meta::SchemaProviderIf const* schema, folly::StringPiece row) noexcept; + /** + * @brief Reset current row reader wrapper of given schemas and data, the schemas are stored in + * vector. + * + * @param schemas + * @param row + * @return Whether reset succeed + */ bool reset(const std::vector>& schemas, folly::StringPiece row) noexcept; @@ -141,36 +217,67 @@ class RowReaderWrapper : public RowReader { return currReader_->getData(); } + /** + * @brief Get schema version and reader version by data + * + * @param row Row data + * @param schemaVer Schema version + * @param readerVer Row reader version + */ static void getVersions(const folly::StringPiece& row, SchemaVer& schemaVer, int32_t& readerVer); + /** + * @brief Return whether wrapper points to a valid data + */ operator bool() const noexcept { return operator!=(nullptr); } + /** + * @brief Return whether wrapper points to a valid data + */ bool operator==(std::nullptr_t) const noexcept { return !operator!=(nullptr); } + /** + * @brief Return whether wrapper points to a valid data + */ bool operator!=(std::nullptr_t) const noexcept { return currReader_ != nullptr; } + /** + * @brief Return this row reader wrapper + */ RowReaderWrapper* operator->() const noexcept { return get(); } + /** + * @brief Return this row reader wrapper + */ RowReaderWrapper* get() const noexcept { return const_cast(this); } + /** + * @brief Return this row reader wrapper + */ RowReaderWrapper* get() noexcept { return this; } + /** + * @brief Return this row reader wrapper + */ RowReaderWrapper& operator*() const noexcept { return *get(); } + /** + * @brief Reset to an empty row reader + */ void reset() noexcept { currReader_ = nullptr; } diff --git a/src/codec/RowWriterV2.h b/src/codec/RowWriterV2.h index acb0069c4ef..6b79c00a0ca 100644 --- a/src/codec/RowWriterV2.h +++ b/src/codec/RowWriterV2.h @@ -95,28 +95,60 @@ class RowWriterV2 { ~RowWriterV2() = default; - // Return the exact length of the encoded binary array + /** + * @brief Return the exact length of the encoded binary array + * + * @return int64_t + */ int64_t size() const noexcept { return buf_.size(); } + /** + * @brief Return the related schema + * + * @return const meta::SchemaProviderIf* + */ const meta::SchemaProviderIf* schema() const { return schema_; } + /** + * @brief Get the encoded string + * + * @return const std::string& + */ const std::string& getEncodedStr() const noexcept { CHECK(finished_) << "You need to call finish() first"; return buf_; } + /** + * @brief Get the encoded string with move + * + * @return std::string + */ std::string moveEncodedStr() noexcept { CHECK(finished_) << "You need to call finish() first"; return std::move(buf_); } + /** + * @brief Finish setting fields, begin to encode + * + * @return WriteResult Whether encode succeed + */ WriteResult finish() noexcept; // Data write + /** + * @brief Set propertyfield value by index + * + * @tparam T + * @param index Field index + * @param v Value to write + * @return WriteResult + */ template WriteResult set(size_t index, T&& v) noexcept { CHECK(!finished_) << "You have called finish()"; @@ -127,6 +159,14 @@ class RowWriterV2 { } // Data write + /** + * @brief Set property value by property name + * + * @tparam T + * @param name Property name + * @param v Value to write + * @return WriteResult + */ template WriteResult set(const std::string& name, T&& v) noexcept { CHECK(!finished_) << "You have called finish()"; @@ -138,10 +178,38 @@ class RowWriterV2 { } } + /** + * @brief Set the value by index + * + * @param index + * @param val + * @return WriteResult + */ WriteResult setValue(ssize_t index, const Value& val) noexcept; + + /** + * @brief Set the value by index + * + * @param index + * @param val + * @return WriteResult + */ WriteResult setValue(const std::string& name, const Value& val) noexcept; + /** + * @brief Set null by index + * + * @param index + * @return WriteResult + */ WriteResult setNull(ssize_t index) noexcept; + + /** + * @brief Set null by property name + * + * @param name + * @return WriteResult + */ WriteResult setNull(const std::string& name) noexcept; private: diff --git a/src/codec/test/RowWriterV1-inl.h b/src/codec/test/RowWriterV1-inl.h index 5ccf36ebf12..fd876c00be9 100644 --- a/src/codec/test/RowWriterV1-inl.h +++ b/src/codec/test/RowWriterV1-inl.h @@ -22,7 +22,7 @@ typename std::enable_if::value, RowWriterV1&>::type RowWrite break; } default: { - LOG(ERROR) << "Incompatible value type \"int\""; + LOG(WARNING) << "Incompatible value type \"int\""; writeInt(0); break; } diff --git a/src/codec/test/RowWriterV1.cpp b/src/codec/test/RowWriterV1.cpp index 7448e827798..9906ef7d2f5 100644 --- a/src/codec/test/RowWriterV1.cpp +++ b/src/codec/test/RowWriterV1.cpp @@ -83,7 +83,7 @@ RowWriterV1& RowWriterV1::operator<<(bool v) noexcept { cord_ << v; break; default: - LOG(ERROR) << "Incompatible value type \"bool\""; + LOG(WARNING) << "Incompatible value type \"bool\""; // Output a default value cord_ << false; break; @@ -107,7 +107,7 @@ RowWriterV1& RowWriterV1::operator<<(float v) noexcept { cord_ << static_cast(v); break; default: - LOG(ERROR) << "Incompatible value type \"float\""; + LOG(WARNING) << "Incompatible value type \"float\""; cord_ << static_cast(0.0); break; } @@ -130,7 +130,7 @@ RowWriterV1& RowWriterV1::operator<<(double v) noexcept { cord_ << v; break; default: - LOG(ERROR) << "Incompatible value type \"double\""; + LOG(WARNING) << "Incompatible value type \"double\""; cord_ << static_cast(0.0); break; } @@ -160,7 +160,7 @@ RowWriterV1& RowWriterV1::operator<<(folly::StringPiece v) noexcept { break; } default: { - LOG(ERROR) << "Incompatible value type \"string\""; + LOG(WARNING) << "Incompatible value type \"string\""; writeInt(0); break; } @@ -182,7 +182,7 @@ RowWriterV1& RowWriterV1::operator<<(folly::StringPiece v) noexcept { ***************************/ RowWriterV1& RowWriterV1::operator<<(Skip&& skip) noexcept { if (skip.toSkip_ <= 0) { - VLOG(2) << "Nothing to skip"; + VLOG(4) << "Nothing to skip"; return *this; } diff --git a/src/common/utils/NebulaKeyUtils.cpp b/src/common/utils/NebulaKeyUtils.cpp index 8c81230ec48..ff1c60ba8bd 100644 --- a/src/common/utils/NebulaKeyUtils.cpp +++ b/src/common/utils/NebulaKeyUtils.cpp @@ -72,13 +72,14 @@ std::string NebulaKeyUtils::edgeKey(size_t vIdLen, .append(1, ev); return key; } + // static std::string NebulaKeyUtils::vertexKey(size_t vIdLen, PartitionID partId, const VertexID& vId, char pad) { CHECK_GE(vIdLen, vId.size()); - int32_t item = (partId << kPartitionOffset) | static_cast(NebulaKeyType::kVertex); + PartitionID item = (partId << kPartitionOffset) | static_cast(NebulaKeyType::kVertex); std::string key; key.reserve(kTagLen + vIdLen); key.append(reinterpret_cast(&item), sizeof(int32_t)) @@ -86,6 +87,16 @@ std::string NebulaKeyUtils::vertexKey(size_t vIdLen, .append(vIdLen - vId.size(), pad); return key; } + +// static +std::string NebulaKeyUtils::vertexPrefix(PartitionID partId) { + PartitionID item = (partId << kPartitionOffset) | static_cast(NebulaKeyType::kVertex); + std::string key; + key.reserve(sizeof(PartitionID)); + key.append(reinterpret_cast(&item), sizeof(PartitionID)); + return key; +} + // static std::string NebulaKeyUtils::systemCommitKey(PartitionID partId) { int32_t item = (partId << kPartitionOffset) | static_cast(NebulaKeyType::kSystem); diff --git a/src/common/utils/NebulaKeyUtils.h b/src/common/utils/NebulaKeyUtils.h index ce04c56dd1c..6d07383e4dd 100644 --- a/src/common/utils/NebulaKeyUtils.h +++ b/src/common/utils/NebulaKeyUtils.h @@ -65,10 +65,14 @@ class NebulaKeyUtils final { EdgeRanking rank, const VertexID& dstId, EdgeVerPlaceHolder ev = 1); + static std::string vertexKey(size_t vIdLen, PartitionID partId, const VertexID& vId, char pad = '\0'); + + static std::string vertexPrefix(PartitionID partId); + static std::string systemCommitKey(PartitionID partId); static std::string systemPartKey(PartitionID partId); diff --git a/src/kvstore/Common.h b/src/kvstore/Common.h index 1477573941f..ede3ae3cc69 100644 --- a/src/kvstore/Common.h +++ b/src/kvstore/Common.h @@ -17,15 +17,23 @@ namespace nebula { namespace kvstore { +/** + * @brief Wrapper of rocksdb compaction filter function + */ class KVFilter { public: KVFilter() = default; virtual ~KVFilter() = default; /** - * Remove the key in background compaction if return true, otherwise return - * false. - * */ + * @brief Whether remove the key during compaction + * + * @param spaceId + * @param key + * @param val + * @return true Key will not be removed + * @return false Key will be removed + */ virtual bool filter(GraphSpaceID spaceId, const folly::StringPiece& key, const folly::StringPiece& val) const = 0; @@ -35,6 +43,9 @@ using KV = std::pair; using KVCallback = folly::Function; using NewLeaderCallback = folly::Function; +/** + * @brief folly::StringPiece to rocksdb::Slice + */ inline rocksdb::Slice toSlice(const folly::StringPiece& str) { return rocksdb::Slice(str.begin(), str.size()); } diff --git a/src/kvstore/CompactionFilter.h b/src/kvstore/CompactionFilter.h index 7fdc88731b3..bb2830f5282 100644 --- a/src/kvstore/CompactionFilter.h +++ b/src/kvstore/CompactionFilter.h @@ -17,16 +17,35 @@ DECLARE_int32(custom_filter_interval_secs); namespace nebula { namespace kvstore { +/** + * @brief CompactionFilter, built by CompactionFilterFactory + */ class KVCompactionFilter final : public rocksdb::CompactionFilter { public: + /** + * @brief Construct a new KVCompactionFilter object + * + * @param spaceId + * @param kvFilter A wrapper of filter function + */ KVCompactionFilter(GraphSpaceID spaceId, std::unique_ptr kvFilter) : spaceId_(spaceId), kvFilter_(std::move(kvFilter)) {} - bool Filter(int, + /** + * @brief whether remove the key during compaction + * + * @param level Levels of key in rocksdb, not used for now + * @param key Rocksdb key + * @param val Rocksdb val + * @return true Key will not be removed + * @return false Key will be removed + */ + bool Filter(int level, const rocksdb::Slice& key, const rocksdb::Slice& val, std::string*, bool*) const override { + UNUSED(level); return kvFilter_->filter(spaceId_, folly::StringPiece(key.data(), key.size()), folly::StringPiece(val.data(), val.size())); @@ -41,12 +60,21 @@ class KVCompactionFilter final : public rocksdb::CompactionFilter { std::unique_ptr kvFilter_; }; +/** + * @brief CompactionFilterFactory, built by CompactionFilterFactoryBuilder + */ class KVCompactionFilterFactory : public rocksdb::CompactionFilterFactory { public: explicit KVCompactionFilterFactory(GraphSpaceID spaceId) : spaceId_(spaceId) {} virtual ~KVCompactionFilterFactory() = default; + /** + * @brief Create a Compaction Filter object, called by rocksdb when doing compaction + * + * @param context Information about compaction + * @return std::unique_ptr + */ std::unique_ptr CreateCompactionFilter( const rocksdb::CompactionFilter::Context& context) override { auto now = time::WallClock::fastNowInSec(); @@ -77,6 +105,10 @@ class KVCompactionFilterFactory : public rocksdb::CompactionFilterFactory { int32_t lastRunCustomFilterTimeSec_ = 0; }; +/** + * @brief CompactionFilterFactoryBuilder is a wrapper to build rocksdb CompactionFilterFactory, + * implemented by storage + */ class CompactionFilterFactoryBuilder { public: CompactionFilterFactoryBuilder() = default; diff --git a/src/kvstore/DiskManager.cpp b/src/kvstore/DiskManager.cpp index 69cc7954054..f7354ceb61b 100644 --- a/src/kvstore/DiskManager.cpp +++ b/src/kvstore/DiskManager.cpp @@ -131,7 +131,7 @@ void DiskManager::refresh() { boost::system::error_code ec; auto info = boost::filesystem::space(dataPaths_[i], ec); if (!ec) { - VLOG(1) << "Refresh filesystem info of " << dataPaths_[i]; + VLOG(2) << "Refresh filesystem info of " << dataPaths_[i]; freeBytes_[i] = info.available; } else { LOG(WARNING) << "Get filesystem info of " << dataPaths_[i] << " failed"; diff --git a/src/kvstore/DiskManager.h b/src/kvstore/DiskManager.h index b5706dda07a..641cef482b0 100644 --- a/src/kvstore/DiskManager.h +++ b/src/kvstore/DiskManager.h @@ -24,42 +24,85 @@ using PartDiskMap = std::unordered_map>; using SpaceDiskPartsMap = std::unordered_map>; +/** + * @brief Monitor remaining spaces of each disk + */ class DiskManager { FRIEND_TEST(DiskManagerTest, AvailableTest); FRIEND_TEST(DiskManagerTest, WalNoSpaceTest); public: + /** + * @brief Construct a new Disk Manager object + * + * @param dataPaths `data_path` in configuration + * @param bgThread Backgournd thread to refresh remaining spaces of each data path + */ DiskManager(const std::vector& dataPaths, std::shared_ptr bgThread = nullptr); - // Canonical path of all which contains the specified space, - // e.g. {"/DataPath1/nebula/spaceId", "/DataPath2/nebula/spaceId" ... } + /** + * @brief return canonical data path of given space + * + * @param spaceId + * @return StatusOr> Canonical path of all which contains the specified + * space, e.g. {"/DataPath1/nebula/spaceId", "/DataPath2/nebula/spaceId" ... } + */ StatusOr> path(GraphSpaceID spaceId); - // Canonical path which contains the specified space and part, e.g. - // "/DataPath/nebula/spaceId". As for one storage instance, at most one path - // should contain a partition. Note that there isn't a separate dir for a - // partition (except wal), so we return space dir + /** + * @brief Canonical path which contains the specified space and part, e.g. + * "/DataPath/nebula/spaceId". As for one storage instance, at most one path should contain a + * partition. Note that there isn't a separate dir for a partition (except wal), so we return + * space dir + * + * @param spaceId + * @param partId + * @return StatusOr data path of given partId if found, else return error status + */ StatusOr path(GraphSpaceID spaceId, PartitionID partId); - // pre-condition: path is the space path, so it must end with /nebula/spaceId - // and path exists + /** + * @brief Add a partition to a given path, called when add a partiton in NebulaStore + * @pre Path is the space path, so it must end with /nebula/spaceId and path must exists + * + * @param spaceId + * @param partId + * @param path + */ void addPartToPath(GraphSpaceID spaceId, PartitionID partId, const std::string& path); - // pre-condition: path is the space path, so it must end with /nebula/spaceId - // and path exists + /** + * @brief Remove a partition form a given path, called when remove a partiton in NebulaStore + * @pre Path is the space path, so it must end with /nebula/spaceId and path must exists + * + * @param spaceId + * @param partId + * @param path + */ void removePartFromPath(GraphSpaceID spaceId, PartitionID partId, const std::string& path); + /** + * @brief Check if the data path of given partition has enough spaces + * + * @param spaceId + * @param partId + * @return true Data path remains enough space + * @return false Data path does not remain enough space + */ bool hasEnoughSpace(GraphSpaceID spaceId, PartitionID partId); - // Given a space, return data path and all partition in the path - StatusOr partDist(GraphSpaceID spaceId); - - // Get all space data path and all partition in the path + /** + * @brief Get all partitions grouped by data path and spaceId + * + * @param diskParts Get all space data path and all partition in the path + */ void getDiskParts(SpaceDiskPartsMap& diskParts); private: - // refresh free bytes of data path periodically + /** + * @brief Refresh free bytes of data path periodically + */ void refresh(); private: diff --git a/src/kvstore/EventListener.h b/src/kvstore/EventListener.h index b90f035a3bc..0076efcb881 100644 --- a/src/kvstore/EventListener.h +++ b/src/kvstore/EventListener.h @@ -12,8 +12,11 @@ namespace kvstore { class EventListener : public rocksdb::EventListener { public: - // A callback function to RocksDB which will be called before a RocksDB starts - // to compact. + /** + * @brief A callback function to RocksDB which will be called before a RocksDB starts to compact. + * + * @param info Compaction job information passed by rocksdb + */ void OnCompactionBegin(rocksdb::DB*, const rocksdb::CompactionJobInfo& info) override { LOG(INFO) << "Rocksdb start compaction column family: " << info.cf_name << " because of " << compactionReasonString(info.compaction_reason) @@ -22,8 +25,12 @@ class EventListener : public rocksdb::EventListener { << info.base_input_level << ", output level is " << info.output_level; } - // A callback function for RocksDB which will be called - // whenever a registered RocksDB compacts a file. + /** + * @brief A callback function for RocksDB which will be called whenever a registered RocksDB + * compacts a file. + * + * @param info Compaction job information passed by rocksdb + */ void OnCompactionCompleted(rocksdb::DB*, const rocksdb::CompactionJobInfo& info) override { LOG(INFO) << "Rocksdb compaction completed column family: " << info.cf_name << " because of " << compactionReasonString(info.compaction_reason) @@ -32,8 +39,12 @@ class EventListener : public rocksdb::EventListener { << info.base_input_level << ", output level is " << info.output_level; } - // A callback function to RocksDB which will be called - // before a RocksDB starts to flush memtables. + /** + * @brief A callback function to RocksDB which will be called before a RocksDB starts to flush + * memtables. + * + * @param info Flush job information passed by rocksdb + */ void OnFlushBegin(rocksdb::DB*, const rocksdb::FlushJobInfo& info) override { VLOG(1) << "Rocksdb start flush column family: " << info.cf_name << " because of " << flushReasonString(info.flush_reason) @@ -43,8 +54,12 @@ class EventListener : public rocksdb::EventListener { << ", the properties of the table: " << info.table_properties.ToString(); } - // A callback function to RocksDB which will be called - // whenever a registered RocksDB flushes a file. + /** + * @brief A callback function to RocksDB which will be called whenever a registered RocksDB + * flushes a file. + * + * @param info Flush job information passed by rocksdb + */ void OnFlushCompleted(rocksdb::DB*, const rocksdb::FlushJobInfo& info) override { VLOG(1) << "Rocksdb flush completed column family: " << info.cf_name << " because of " << flushReasonString(info.flush_reason) << " the newly created file: " << info.file_path @@ -53,123 +68,199 @@ class EventListener : public rocksdb::EventListener { << " the properties of the table: " << info.table_properties.ToString(); } - // A callback function for RocksDB which will be called whenever a SST file is - // created. + /** + * @brief A callback function for RocksDB which will be called whenever a SST file is created. + * + * @param info Table file creation information passed by rocksdb + */ void OnTableFileCreated(const rocksdb::TableFileCreationInfo& info) override { VLOG(3) << "Rocksdb SST file created: the path is " << info.file_path << " the file size is " << info.file_size; } - // A callback function for RocksDB which will be called whenever a SST file is - // deleted. + /** + * @brief A callback function for RocksDB which will be called whenever a SST file is deleted + * + * @param info Table file deletion information passed by rocksdb + */ void OnTableFileDeleted(const rocksdb::TableFileDeletionInfo& info) override { VLOG(3) << "Rocksdb SST file deleted: the path is " << info.file_path; } - // A callback function for RocksDB which will be called before a SST file is - // being created. + /** + * @brief A callback function for RocksDB which will be called before a SST file is being created. + * + * @param info Table file creation information passed by rocksdb + */ void OnTableFileCreationStarted(const rocksdb::TableFileCreationBriefInfo& info) override { VLOG(3) << " database's name is " << info.db_name << ", column family's name is " << info.cf_name << ", the created file is " << info.file_path << ", because of " << tableFileCreationReasonString(info.reason); } - // A callback function for RocksDB which will be called before - // a memtable is made immutable. + /** + * @brief A callback function for RocksDB which will be called before a memtable is made + * immutable. + * + * @param info MemTable informations passed by rocksdb + */ void OnMemTableSealed(const rocksdb::MemTableInfo& info) override { VLOG(3) << "MemTable Sealed column family: " << info.cf_name << ", the total number of entries: " << info.num_entries << ", the total number of deletes: " << info.num_deletes; } - // A callback function for RocksDB which will be called before - // a column family handle is deleted. - void OnColumnFamilyHandleDeletionStarted(rocksdb::ColumnFamilyHandle* /*handle*/) override {} + /** + * @brief A callback function for RocksDB which will be called before a column family handle is + * deleted. + * + * @param handle Comlumn family handle passed by rocksdb + */ + void OnColumnFamilyHandleDeletionStarted(rocksdb::ColumnFamilyHandle* handle) override { + UNUSED(handle); + } - // A callback function for RocksDB which will be called after an external - // file is ingested using IngestExternalFile. - void OnExternalFileIngested(rocksdb::DB*, + /** + * @brief A callback function for RocksDB which will be called after an external file is ingested + * using IngestExternalFile. + * + * @param db Rocksdb instance + * @param info Ingest infomations passed by rocksdb + */ + void OnExternalFileIngested(rocksdb::DB* db, const rocksdb::ExternalFileIngestionInfo& info) override { - LOG(INFO) << "Ingest external SST file: column family " << info.cf_name - << ", the external file path " << info.external_file_path - << ", the internal file path " << info.internal_file_path - << ", the properties of the table: " << info.table_properties.ToString(); + UNUSED(db); + VLOG(1) << "Ingest external SST file: column family " << info.cf_name + << ", the external file path " << info.external_file_path << ", the internal file path " + << info.internal_file_path + << ", the properties of the table: " << info.table_properties.ToString(); } - // A callback function for RocksDB which will be called before setting the - // background error status to a non-OK value. - void OnBackgroundError(rocksdb::BackgroundErrorReason reason, rocksdb::Status*) override { - LOG(INFO) << "BackgroundError: because of " << backgroundErrorReasonString(reason); + /** + * @brief A callback function for RocksDB which will be called before setting the background error + * status to a non-OK value, e.g. disk is corrupted during comapction + * + * @param reason Reason to start a background job + * @param status Detail status of the background job + */ + void OnBackgroundError(rocksdb::BackgroundErrorReason reason, rocksdb::Status* status) override { + LOG(INFO) << "BackgroundError: because of " << backgroundErrorReasonString(reason) << " " + << status->ToString(); } - // A callback function for RocksDB which will be called whenever a change - // of superversion triggers a change of the stall conditions. + /** + * @brief A callback function for RocksDB which will be called whenever a change of superversion + * triggers a change of the stall conditions. + * + * @param info Current and previous status of whether write is stalled + */ void OnStallConditionsChanged(const rocksdb::WriteStallInfo& info) override { LOG(INFO) << "Stall conditions changed column family: " << info.cf_name << ", current condition: " << writeStallConditionString(info.condition.cur) << ", previous condition: " << writeStallConditionString(info.condition.prev); } - // A callback function for RocksDB which will be called whenever a file read - // operation finishes. + /** + * @brief A callback function for RocksDB which will be called whenever a file read operation + * finishes. + * + * @param info Information when read a rocksdb file + */ void OnFileReadFinish(const rocksdb::FileOperationInfo& info) override { - VLOG(3) << "Reading file finished: file path is " << info.path << " offset: " << info.offset + VLOG(4) << "Reading file finished: file path is " << info.path << " offset: " << info.offset << " length: " << info.length; } - // A callback function for RocksDB which will be called whenever a file write - // operation finishes. + /** + * @brief A callback function for RocksDB which will be called whenever a file write operation + * finishes. + * + * @param info Information when write a rocksdb file + */ void OnFileWriteFinish(const rocksdb::FileOperationInfo& info) override { - VLOG(3) << "Writeing file finished: file path is " << info.path << " offset: " << info.offset + VLOG(4) << "Writeing file finished: file path is " << info.path << " offset: " << info.offset << " length: " << info.length; } - // A callback function for RocksDB which will be called whenever a file flush - // operation finishes. + /** + * @brief A callback function for RocksDB which will be called whenever a file flush operation + * finishes. + * + * @param info Information when flush a rocksdb file + */ void OnFileFlushFinish(const rocksdb::FileOperationInfo& info) override { - VLOG(3) << "Flushing file finished: file path is " << info.path << " offset: " << info.offset + VLOG(4) << "Flushing file finished: file path is " << info.path << " offset: " << info.offset << " length: " << info.length; } - // A callback function for RocksDB which will be called whenever a file sync - // operation finishes. + /** + * @brief A callback function for RocksDB which will be called whenever a file sync operation + * finishes. + * + * @param info Information when sync a rocksdb file + */ void OnFileSyncFinish(const rocksdb::FileOperationInfo& info) override { - VLOG(3) << "Syncing file finished: file path is " << info.path << " offset: " << info.offset + VLOG(4) << "Syncing file finished: file path is " << info.path << " offset: " << info.offset << " length: " << info.length; } - // A callback function for RocksDB which will be called whenever a file - // rangeSync operation finishes. + /** + * @brief A callback function for RocksDB which will be called whenever a file rangeSync operation + * finishes. + * + * @param info Information when range sync a rocksdb file + */ void OnFileRangeSyncFinish(const rocksdb::FileOperationInfo& info) override { - VLOG(3) << "RangeSyncing file finished: file path is " << info.path + VLOG(4) << "RangeSyncing file finished: file path is " << info.path << " offset: " << info.offset << " length: " << info.length; } - // A callback function for RocksDB which will be called whenever a file - // truncate operation finishes. + /** + * @brief A callback function for RocksDB which will be called whenever a file truncate operation + * finishes. + * + * @param info Information when truncate a rocksdb file + */ void OnFileTruncateFinish(const rocksdb::FileOperationInfo& info) override { - VLOG(3) << "Truncating file finished: file path is " << info.path << " offset: " << info.offset + VLOG(4) << "Truncating file finished: file path is " << info.path << " offset: " << info.offset << " length: " << info.length; } - // A callback function for RocksDB which will be called whenever a file close - // operation finishes. + /** + * @brief A callback function for RocksDB which will be called whenever a file close operation + * finishes. + * + * @param info Information when close a rocksdb file + */ void OnFileCloseFinish(const rocksdb::FileOperationInfo& info) override { - VLOG(3) << "Closing file finished: file path is " << info.path; + VLOG(4) << "Closing file finished: file path is " << info.path; } - // A callback function for RocksDB which will be called just before - // starting the automatic recovery process for recoverable background errors。 + /** + * @brief A callback function for RocksDB which will be called just before starting the automatic + * recovery process for recoverable background errors。 + * + * @param reason Reason to start a background job + * @param status Background job status + * @param autoRecovery Whether is recovered automatically + */ void OnErrorRecoveryBegin(rocksdb::BackgroundErrorReason reason, - rocksdb::Status /* bg_error */, - bool* /* auto_recovery */) override { - LOG(INFO) << "Error recovery begin: because of " << backgroundErrorReasonString(reason); + rocksdb::Status status, + bool* autoRecovery) override { + UNUSED(autoRecovery); + LOG(INFO) << "Error recovery begin: because of " << backgroundErrorReasonString(reason) + << ", previously failed because of " << status.ToString(); } - // A callback function for RocksDB which will be called once the database - // is recovered from read-only mode after an error. - void OnErrorRecoveryCompleted(rocksdb::Status) override { - LOG(INFO) << "Error Recovery Completed"; + /** + * @brief A callback function for RocksDB which will be called once the database is recovered from + * read-only mode after an error. + * + * @param oldBgStatus Previous background job status + */ + void OnErrorRecoveryCompleted(rocksdb::Status oldBgStatus) override { + LOG(INFO) << "Error Recovery Completed, previously failed because of " + << oldBgStatus.ToString(); } private: diff --git a/src/kvstore/KVEngine.h b/src/kvstore/KVEngine.h index dd9364a3de2..71902c12b89 100644 --- a/src/kvstore/KVEngine.h +++ b/src/kvstore/KVEngine.h @@ -15,18 +15,43 @@ namespace nebula { namespace kvstore { +/** + * @brief wrapper of batch write + */ class WriteBatch { public: virtual ~WriteBatch() = default; + /** + * @brief Encode the operation of put key/value into write batch + * + * @param key Key to put + * @param value Value to put + * @return nebula::cpp2::ErrorCode + */ virtual nebula::cpp2::ErrorCode put(folly::StringPiece key, folly::StringPiece value) = 0; + /** + * @brief Encode the operation of remove key into write batch + * + * @param key Key to remove + * @return nebula::cpp2::ErrorCode + */ virtual nebula::cpp2::ErrorCode remove(folly::StringPiece key) = 0; - // Remove all keys in the range [start, end) + /** + * @brief Encode the operation of remove keys in range [start, end) into write batch + * + * @param start Start key to be removed, inclusive + * @param end End key to be removed, exclusive + * @return nebula::cpp2::ErrorCode + */ virtual nebula::cpp2::ErrorCode removeRange(folly::StringPiece start, folly::StringPiece end) = 0; }; +/** + * @brief Key-value engine object + */ class KVEngine { public: explicit KVEngine(GraphSpaceID spaceId) : spaceId_(spaceId) {} @@ -35,15 +60,37 @@ class KVEngine { virtual void stop() = 0; - // Retrieve the root path for the data - // If the store is persistent, a valid path will be returned - // Otherwise, nullptr will be returned + /** + * @brief Retrieve the data path of kv engine + * + * @return const char* Data path of kv engine + */ virtual const char* getDataRoot() const = 0; + /** + * @brief Retrieve the wal path of kv engine + * + * @return const char* Wal path of kv engine + */ virtual const char* getWalRoot() const = 0; + /** + * @brief return a WriteBatch object to do batch operation + * + * @return std::unique_ptr + */ virtual std::unique_ptr startBatchWrite() = 0; + /** + * @brief write the batch operation into kv engine + * + * @param batch WriteBatch object + * @param disableWAL Whether wal is disabled, only used in rocksdb + * @param sync Whether need to sync when write, only used in rocksdb + * @param wait Whether wait until write result, rocksdb would return incompelete if wait is false + * in certain scenario + * @return nebula::cpp2::ErrorCode + */ virtual nebula::cpp2::ErrorCode commitBatchWrite(std::unique_ptr batch, bool disableWAL, bool sync, @@ -55,21 +102,42 @@ class KVEngine { * @return const void* snapshot pointer. */ virtual const void* GetSnapshot() = 0; + /** * @brief Release snapshot from kv engine. * * @param snapshot */ virtual void ReleaseSnapshot(const void* snapshot) = 0; - // Read a single key + + /** + * @brief Read a single key + * + * @param key Key to read + * @param value Pointer of value + * @return nebula::cpp2::ErrorCode + */ virtual nebula::cpp2::ErrorCode get(const std::string& key, std::string* value) = 0; - // Read a list of keys, if key[i] does not exist, the i-th value in return - // value would be Status::KeyNotFound + /** + * @brief Read a list of keys + * + * @param keys Keys to read + * @param values Pointers of value + * @return std::vector Result status of each key, if key[i] does not exist, the i-th value + * in return value would be Status::KeyNotFound + */ virtual std::vector multiGet(const std::vector& keys, std::vector* values) = 0; - // Get all results in range [start, end) + /** + * @brief Get all results in range [start, end) + * + * @param start Start key, inclusive + * @param end End key, exclusive + * @param iter Iterator in range [start, end), returns by kv engine + * @return nebula::cpp2::ErrorCode + */ virtual nebula::cpp2::ErrorCode range(const std::string& start, const std::string& end, std::unique_ptr* iter) = 0; @@ -77,77 +145,186 @@ class KVEngine { /** * @brief Get all results with 'prefix' str as prefix. * - * @param prefix Prefix string. + * @param prefix The prefix of keys to iterate + * @param iter Iterator of keys starts with 'prefix', returns by kv engine * @param snapshot Snapshot from kv engine. nullptr means no snapshot. - * @param iter Iterator for this prefix range. * @return nebula::cpp2::ErrorCode */ virtual nebula::cpp2::ErrorCode prefix(const std::string& prefix, std::unique_ptr* iter, const void* snapshot = nullptr) = 0; - // Get all results with 'prefix' str as prefix starting form 'start' + /** + * @brief Get all results with 'prefix' str as prefix starting form 'start' + * + * @param start Start key, inclusive + * @param prefix The prefix of keys to iterate + * @param iter Iterator of keys starts with 'prefix' beginning from 'start', returns by kv engine + * @return nebula::cpp2::ErrorCode + */ virtual nebula::cpp2::ErrorCode rangeWithPrefix(const std::string& start, const std::string& prefix, std::unique_ptr* iter) = 0; + /** + * @brief Scan all keys in kv engine + * + * @param storageIter Iterator returns by kv engine + * @return nebula::cpp2::ErrorCode + */ virtual nebula::cpp2::ErrorCode scan(std::unique_ptr* storageIter) = 0; - // Write a single record + /** + * @brief Write a single record + * + * @param key Key to write + * @param value Value to write + * @return nebula::cpp2::ErrorCode + */ virtual nebula::cpp2::ErrorCode put(std::string key, std::string value) = 0; - // Write a batch of records + /** + * @brief Write a batch of records + * + * @param keyValues Key-value pairs to write + * @return nebula::cpp2::ErrorCode + */ virtual nebula::cpp2::ErrorCode multiPut(std::vector keyValues) = 0; - // Remove a single key + /** + * @brief Remove a single key + * + * @param key Key to remove + * @return nebula::cpp2::ErrorCode + */ virtual nebula::cpp2::ErrorCode remove(const std::string& key) = 0; - // Remove a batch of keys + /** + * @brief Remove a batch of keys + * + * @param keys Keys to remove + * @return nebula::cpp2::ErrorCode + */ virtual nebula::cpp2::ErrorCode multiRemove(std::vector keys) = 0; - // Remove range [start, end) + /** + * @brief Remove key in range [start, end) + * + * @param start Start key, inclusive + * @param end End key, exclusive + * @return nebula::cpp2::ErrorCode + */ virtual nebula::cpp2::ErrorCode removeRange(const std::string& start, const std::string& end) = 0; - // Add partId into current storage engine. + /** + * @brief Add a partition to kv engine + * + * @param partId Partition id to add + */ virtual void addPart(PartitionID partId) = 0; - // Remove partId from current storage engine. + /** + * @brief Remove a partition from kv engine + * + * @param partId Partition id to add + */ virtual void removePart(PartitionID partId) = 0; - // Return all partIds current storage engine holds. + /** + * @brief Return all partIds in kv engine + * + * @return std::vector Partition ids + */ virtual std::vector allParts() = 0; - // Return total parts num + /** + * @brief Return total parts num + * + * @return int32_t Count of partition num + */ virtual int32_t totalPartsNum() = 0; - // Ingest sst files + /** + * @brief Ingest external sst files, only used in rocksdb + * + * @param files SST file path + * @param verifyFileChecksum Whether verify sst check-sum during ingestion + * @return nebula::cpp2::ErrorCode + */ virtual nebula::cpp2::ErrorCode ingest(const std::vector& files, bool verifyFileChecksum = false) = 0; - // Set Config Option + /** + * @brief Set config option, only used in rocksdb + * + * @param configKey Config name + * @param configValue Config value + * @return nebula::cpp2::ErrorCode + */ virtual nebula::cpp2::ErrorCode setOption(const std::string& configKey, const std::string& configValue) = 0; - // Set DB Config Option + /** + * @brief Set DB config option, only used in rocksdb + * + * @param configKey Config name + * @param configValue Config value + * @return nebula::cpp2::ErrorCode + */ virtual nebula::cpp2::ErrorCode setDBOption(const std::string& configKey, const std::string& configValue) = 0; - // Get DB Property + /** + * @brief Get engine property, only used in rocksdb + * + * @param property Config name + * @return ErrorOr + */ virtual ErrorOr getProperty( const std::string& property) = 0; + /** + * @brief Do data compation in lsm tree + * + * @return nebula::cpp2::ErrorCode + */ virtual nebula::cpp2::ErrorCode compact() = 0; + /** + * @brief Flush data in memtable into sst + * + * @return nebula::cpp2::ErrorCode + */ virtual nebula::cpp2::ErrorCode flush() = 0; + /** + * @brief Create a rocksdb check point + * + * @param checkpointPath + * @return nebula::cpp2::ErrorCode + */ virtual nebula::cpp2::ErrorCode createCheckpoint(const std::string& checkpointPath) = 0; // For meta + /** + * @brief Backup the data of a table prefix, for meta backup + * + * @param path KV engine path + * @param tablePrefix Table prefix + * @param filter Data filter when iterate the table + * @return ErrorOr> Return the sst file path if + * succeed, else return ErrorCode + */ virtual ErrorOr backupTable( const std::string& path, const std::string& tablePrefix, std::function filter) = 0; + /** + * @brief Call rocksdb backup, mainly for rocksdb PlainTable mounted on tmpfs/ramfs + * + * @return nebula::cpp2::ErrorCode + */ virtual nebula::cpp2::ErrorCode backup() = 0; protected: diff --git a/src/kvstore/KVIterator.h b/src/kvstore/KVIterator.h index c2738e98db9..82b284bad6b 100644 --- a/src/kvstore/KVIterator.h +++ b/src/kvstore/KVIterator.h @@ -15,14 +15,33 @@ class KVIterator { public: virtual ~KVIterator() = default; + /** + * @brief Return whether iterator has more key/value + */ virtual bool valid() const = 0; + /** + * @brief Move to next key/value, undefined behaviour when valid is false + */ virtual void next() = 0; + /** + * @brief Move to previous key/value + */ virtual void prev() = 0; + /** + * @brief Return the key of iterator points to + * + * @return folly::StringPiece Key + */ virtual folly::StringPiece key() const = 0; + /** + * @brief Return the value of iterator points to + * + * @return folly::StringPiece Value + */ virtual folly::StringPiece val() const = 0; }; diff --git a/src/kvstore/KVStore.h b/src/kvstore/KVStore.h index 0a21920979a..87c3e6dcf39 100644 --- a/src/kvstore/KVStore.h +++ b/src/kvstore/KVStore.h @@ -58,22 +58,41 @@ struct StoreCapability { class Part; /** - * Interface for all kv-stores - **/ + * @brief Interfaces of kvstore + */ class KVStore { public: virtual ~KVStore() = default; - // Return bit-OR of StoreCapability values; + /** + * @brief Return bit-OR of StoreCapability values; + * + * @return uint32_t Bitwise capability + */ virtual uint32_t capability() const = 0; + /** + * @brief Stop the kvstore + */ virtual void stop() = 0; - // Retrieve the current leader for the given partition. This - // is usually called when E_LEADER_CHANGED error code is returned. + /** + * @brief Retrieve the current leader for the given partition. This is usually called when + * E_LEADER_CHANGED error code is returned. + * + * @param spaceId + * @param partID + * @return ErrorOr Return HostAddr when succeeded, return + * ErrorCode when failed + */ virtual ErrorOr partLeader(GraphSpaceID spaceId, PartitionID partID) = 0; + /** + * @brief Return pointer of part manager + * + * @return PartManager* + */ virtual PartManager* partManager() const { return nullptr; } @@ -83,7 +102,7 @@ class KVStore { * * @param spaceId Space id * @param partID Partition id - * @param canReadFromFollower Flag can read from follower. + * @param canReadFromFollower * @return const void* Snapshot. */ virtual const void* GetSnapshot(GraphSpaceID spaceId, @@ -98,16 +117,34 @@ class KVStore { */ virtual void ReleaseSnapshot(GraphSpaceID spaceId, PartitionID partId, const void* snapshot) = 0; - // Read a single key + /** + * @brief Read a single key + * + * @param spaceId + * @param partId + * @param key + * @param value + * @param canReadFromFollower + * @return nebula::cpp2::ErrorCode + */ virtual nebula::cpp2::ErrorCode get(GraphSpaceID spaceId, PartitionID partId, const std::string& key, std::string* value, bool canReadFromFollower = false) = 0; - // Read multiple keys, if error occurs a cpp2::ErrorCode is returned, - // If key[i] does not exist, the i-th value in return value would be - // Status::KeyNotFound + /** + * @brief Read a list of keys + * + * @param spaceId + * @param partId + * @param keys Keys to read + * @param values Pointers of value + * @param canReadFromFollower + * @return Return std::vector when suceeded: Result status of each key, if key[i] does not + * exist, the i-th value in return value would be Status::KeyNotFound. Return ErrorCode when + * failed + */ virtual std::pair> multiGet( GraphSpaceID spaceId, PartitionID partId, @@ -115,7 +152,17 @@ class KVStore { std::vector* values, bool canReadFromFollower = false) = 0; - // Get all results in range [start, end) + /** + * @brief Get all results in range [start, end) + * + * @param spaceId + * @param partId + * @param start Start key, inclusive + * @param end End key, exclusive + * @param iter Iterator in range [start, end), returns by kv engine + * @param canReadFromFollower + * @return nebula::cpp2::ErrorCode + */ virtual nebula::cpp2::ErrorCode range(GraphSpaceID spaceId, PartitionID partId, const std::string& start, @@ -123,9 +170,9 @@ class KVStore { std::unique_ptr* iter, bool canReadFromFollower = false) = 0; - // Since the `range' interface will hold references to its 3rd & 4th - // parameter, in `iter', thus the arguments must outlive `iter'. Here we - // forbid one to invoke `range' with rvalues, which is the common mistake. + /** + * @brief To forbid to pass rvalue via the 'range' parameter. + */ virtual nebula::cpp2::ErrorCode range(GraphSpaceID spaceId, PartitionID partId, std::string&& start, @@ -134,12 +181,12 @@ class KVStore { bool canReadFromFollower = false) = delete; /** - * @brief Get all results with prefix. + * @brief Get all results with 'prefix' str as prefix. * * @param spaceId * @param partId - * @param prefix - * @param iter + * @param prefix Key of prefix to seek + * @param iter Iterator of keys starts with 'prefix', returns by kv engine * @param canReadFromFollower * @param snapshot If set, read from snapshot. * @return nebula::cpp2::ErrorCode @@ -152,15 +199,7 @@ class KVStore { const void* snapshot = nullptr) = 0; /** - * @brief To forbid to pass rvalue via the `prefix' parameter. - * - * @param spaceId - * @param partId - * @param prefix - * @param iter - * @param canReadFromFollower - * @param snapshot - * @return nebula::cpp2::ErrorCode + * @brief To forbid to pass rvalue via the 'prefix' parameter. */ virtual nebula::cpp2::ErrorCode prefix(GraphSpaceID spaceId, PartitionID partId, @@ -169,7 +208,17 @@ class KVStore { bool canReadFromFollower = false, const void* snapshot = nullptr) = delete; - // Get all results with prefix starting from start + /** + * @brief Get all results with 'prefix' str as prefix starting form 'start' + * + * @param spaceId + * @param partId + * @param start Start key, inclusive + * @param prefix The prefix of keys to iterate + * @param iter Iterator of keys starts with 'prefix' beginning from 'start', returns by kv engine + * @param canReadFromFollower + * @return nebula::cpp2::ErrorCode + */ virtual nebula::cpp2::ErrorCode rangeWithPrefix(GraphSpaceID spaceId, PartitionID partId, const std::string& start, @@ -177,7 +226,9 @@ class KVStore { std::unique_ptr* iter, bool canReadFromFollower = false) = 0; - // To forbid to pass rvalue via the `rangeWithPrefix' parameter. + /** + * @brief To forbid to pass rvalue via the 'rangeWithPrefix' parameter. + */ virtual nebula::cpp2::ErrorCode rangeWithPrefix(GraphSpaceID spaceId, PartitionID partId, std::string&& start, @@ -185,80 +236,221 @@ class KVStore { std::unique_ptr* iter, bool canReadFromFollower = false) = delete; + /** + * @brief Synchronize the kvstore across multiple replica + * + * @param spaceId + * @param partId + * @return nebula::cpp2::ErrorCode + */ virtual nebula::cpp2::ErrorCode sync(GraphSpaceID spaceId, PartitionID partId) = 0; + /** + * @brief Write multiple key/values to kvstore asynchronously + * + * @param spaceId + * @param partId + * @param keyValues Key/values to put + * @param cb Callback when has a result + */ virtual void asyncMultiPut(GraphSpaceID spaceId, PartitionID partId, std::vector&& keyValues, KVCallback cb) = 0; - // Asynchronous version of remove methods + /** + * @brief Remove a key from kvstore asynchronously + * + * @param spaceId + * @param partId + * @param key Key to remove + * @param cb Callback when has a result + */ virtual void asyncRemove(GraphSpaceID spaceId, PartitionID partId, const std::string& key, KVCallback cb) = 0; + /** + * @brief Remove multible keys from kvstore asynchronously + * + * @param spaceId + * @param partId + * @param key Keys to remove + * @param cb Callback when has a result + */ virtual void asyncMultiRemove(GraphSpaceID spaceId, PartitionID partId, std::vector&& keys, KVCallback cb) = 0; + /** + * @brief Remove keys in range [start, end) asynchronously + * + * @param spaceId + * @param partId + * @param start Start key + * @param end End key + * @param cb Callback when has a result + */ virtual void asyncRemoveRange(GraphSpaceID spaceId, PartitionID partId, const std::string& start, const std::string& end, KVCallback cb) = 0; + /** + * @brief Do some atomic operation on kvstore + * + * @param spaceId + * @param partId + * @param op Atomic operation + * @param cb Callback when has a result + */ virtual void asyncAtomicOp(GraphSpaceID spaceId, PartitionID partId, raftex::AtomicOp op, KVCallback cb) = 0; /** - * @brief async commit multi operation. - * difference between asyncMultiPut or asyncMultiRemove is - * this func allow contains both put and remove together - * difference between asyncAtomicOp is asyncAtomicOp may have CAS + * @brief Brief async commit multi operation, difference between asyncMultiPut or asyncMultiRemove + * is this func allow contains both put and remove together, difference between asyncAtomicOp is + * asyncAtomicOp may have CAS + * + * @param spaceId + * @param partId + * @param batch Encoded write batch + * @param cb Callback when has a result */ virtual void asyncAppendBatch(GraphSpaceID spaceId, PartitionID partId, std::string&& batch, KVCallback cb) = 0; + /** + * @brief Ingest the sst file under download directory + * + * @param spaceId + * @return nebula::cpp2::ErrorCode + */ virtual nebula::cpp2::ErrorCode ingest(GraphSpaceID spaceId) = 0; + /** + * @brief Retrive the leader distribution + * + * @param leaderIds The leader address of all partitions + * @return int32_t The leader count of all spaces + */ virtual int32_t allLeader( std::unordered_map>& leaderIds) = 0; + /** + * @brief Get the part object of given spaceId and partId + * + * @param spaceId + * @param partId + * @return ErrorOr> Return the part if succeeed, + * else return ErrorCode + */ virtual ErrorOr> part(GraphSpaceID spaceId, PartitionID partId) = 0; + /** + * @brief Trigger comapction, only used in rocksdb + * + * @param spaceId + * @return nebula::cpp2::ErrorCode + */ virtual nebula::cpp2::ErrorCode compact(GraphSpaceID spaceId) = 0; + /** + * @brief Trigger flush, only used in rocksdb + * + * @param spaceId + * @return nebula::cpp2::ErrorCode + */ virtual nebula::cpp2::ErrorCode flush(GraphSpaceID spaceId) = 0; + /** + * @brief Create a Checkpoint, only used in rocksdb + * + * @param spaceId + * @param name Checkpoint name + * @return ErrorOr> Return the + * checkpoint info if succeed, else return ErrorCode + */ virtual ErrorOr> createCheckpoint( GraphSpaceID spaceId, const std::string& name) = 0; + /** + * @brief Drop a Checkpoint, only used in rocksdb + * + * @param spaceId + * @param name Checkpoint name + * @return nebula::cpp2::ErrorCode + */ virtual nebula::cpp2::ErrorCode dropCheckpoint(GraphSpaceID spaceId, const std::string& name) = 0; + /** + * @brief Set the write blocking flag + * + * @param spaceId + * @param sign True to block. Falst to unblock + * @return nebula::cpp2::ErrorCode + */ virtual nebula::cpp2::ErrorCode setWriteBlocking(GraphSpaceID spaceId, bool sign) = 0; + /** + * @brief Backup the data of a table prefix, for meta backup + * + * @param spaceId + * @param name + * @param tablePrefix Table prefix + * @param filter Data filter when iterate the table + * @return ErrorOr> Return the sst file path if + * succeed, else return ErrorCode + */ virtual ErrorOr> backupTable( GraphSpaceID spaceId, const std::string& name, const std::string& tablePrefix, std::function filter) = 0; - // for meta BR + /** + * @brief Restore from sst files + * + * @param spaceId + * @param files SST file path + * @return nebula::cpp2::ErrorCode + */ virtual nebula::cpp2::ErrorCode restoreFromFiles(GraphSpaceID spaceId, const std::vector& files) = 0; + /** + * @brief Write data to local storage engine only + * + * @param spaceId + * @param keyValues Key/values to write into only local storage engine instead of multiple replica + * @return nebula::cpp2::ErrorCode + */ virtual nebula::cpp2::ErrorCode multiPutWithoutReplicator(GraphSpaceID spaceId, std::vector keyValues) = 0; + /** + * @brief Get the data paths + * + * @return std::vector Data paths + */ virtual std::vector getDataRoot() const = 0; + /** + * @brief Get the kvstore propery, only used in rocksdb + * + * @param spaceId + * @param property Property name + * @return ErrorOr Return the property value in string if + * succeed, else return ErrorCode + */ virtual ErrorOr getProperty( GraphSpaceID spaceId, const std::string& property) = 0; diff --git a/src/kvstore/Listener.cpp b/src/kvstore/Listener.cpp index 82a6e4e2a76..17f5c5a7602 100644 --- a/src/kvstore/Listener.cpp +++ b/src/kvstore/Listener.cpp @@ -59,7 +59,7 @@ void Listener::start(std::vector&& peers, bool) { committedLogTerm_ = logIdAndTerm.second; if (lastLogId_ < committedLogId_) { - LOG(INFO) << idStr_ << "Reset lastLogId " << lastLogId_ << " to be the committedLogId " + LOG(INFO) << idStr_ << "Listener reset lastLogId " << lastLogId_ << " to be the committedLogId " << committedLogId_; lastLogId_ = committedLogId_; lastLogTerm_ = committedLogTerm_; @@ -209,7 +209,8 @@ void Listener::doApply() { break; } default: { - LOG(WARNING) << idStr_ << "Unknown operation: " << static_cast(log[0]); + VLOG(2) << idStr_ + << "Should not reach here. Unknown operation: " << static_cast(log[0]); } } @@ -224,14 +225,8 @@ void Listener::doApply() { std::lock_guard guard(raftLock_); lastApplyLogId_ = lastApplyId; persist(committedLogId_, term_, lastApplyLogId_); - VLOG(1) << idStr_ << "Listener succeeded apply log to " << lastApplyLogId_; + VLOG(2) << idStr_ << "Listener succeeded apply log to " << lastApplyLogId_; lastApplyTime_ = time::WallClock::fastNowInMilliSec(); - VLOG(1) << folly::sformat( - "Commit snapshot to : committedLogId={}," - "committedLogTerm={}, lastApplyLogId_={}", - committedLogId_, - term_, - lastApplyLogId_); } }); } @@ -240,7 +235,7 @@ std::pair Listener::commitSnapshot(const std::vector data; @@ -252,7 +247,7 @@ std::pair Listener::commitSnapshot(const std::vector Listener::commitSnapshot(const std::vector Listener::commitSnapshot(const std::vector g(raftLock_); reset(); - VLOG(1) << folly::sformat( + LOG(INFO) << folly::sformat( "The listener has been reset : leaderCommitId={}," "lastLogTerm={}, term={}," "lastApplyLogId={}", diff --git a/src/kvstore/Listener.h b/src/kvstore/Listener.h index 8ebdb707870..cd423bfac8b 100644 --- a/src/kvstore/Listener.h +++ b/src/kvstore/Listener.h @@ -89,6 +89,21 @@ using RaftClient = thrift::ThriftClientManager diskMan, meta::SchemaManager* schemaMan); - // Initialize listener, all Listener must call this method + /** + * @brief Initialize listener, all Listener must call this method + * + * @param peers Raft peers + * @param asLearner Listener is always a raft learner, so by default true + */ void start(std::vector&& peers, bool asLearner = true) override; - // Stop listener + /** + * @brief Stop listener + */ void stop() override; + /** + * @brief Get listener's apply id + * + * @return LogID logId that has been applied to state machine + */ LogID getApplyId() { return lastApplyLogId_; } + /** + * @brief clean up data in listener, called in RaftPart::reset + * + * @return nebula::cpp2::ErrorCode + */ nebula::cpp2::ErrorCode cleanup() override { CHECK(!raftLock_.try_lock()); leaderCommitId_ = 0; @@ -119,65 +151,139 @@ class Listener : public raftex::RaftPart { return nebula::cpp2::ErrorCode::SUCCEEDED; } + /** + * @brief Trigger RaftPart::reset, clean all data and reset states + */ void resetListener(); + /** + * @brief Check whether listener has catchup leader + */ bool pursueLeaderDone(); protected: + /** + * @brief extra initialize work could do here + */ virtual void init() = 0; - // Last apply id, need to be persisted, used in initialization + /** + * @brief Get last apply id from persistance storage, used in initialization + * + * @return LogID Last apply log id + */ virtual LogID lastApplyLogId() = 0; + /** + * @brief Apply data into listener's state machine + * + * @param data Key/value to apply + * @return True if succeed. False if failed. + */ virtual bool apply(const std::vector& data) = 0; - virtual bool persist(LogID, TermID, LogID) = 0; + /** + * @brief Persist commitLogId commitLogTerm and lastApplyLogId + */ + virtual bool persist(LogID commitLogId, TermID commitLogTerm, LogID lastApplyLogId) = 0; - void onLostLeadership(TermID) override { + /** + * @brief Callback when a raft node lost leadership on term, should not happen in listener + * + * @param term + */ + void onLostLeadership(TermID term) override { + UNUSED(term); LOG(FATAL) << "Should not reach here"; } - void onElected(TermID) override { + /** + * @brief Callback when a raft node elected as leader on term, should not happen in listener + * + * @param term + */ + void onElected(TermID term) override { + UNUSED(term); LOG(FATAL) << "Should not reach here"; } - void onLeaderReady(TermID) override { + /** + * @brief Callback when a raft node is ready to serve as leader, should not happen in listener + * + * @param term + */ + void onLeaderReady(TermID term) override { + UNUSED(term); LOG(FATAL) << "Should not reach here"; } + /** + * @brief Callback when a raft node discover new leader + * + * @param nLeader New leader's address + */ void onDiscoverNewLeader(HostAddr nLeader) override { - LOG(INFO) << idStr_ << "Find the new leader " << nLeader; + VLOG(2) << idStr_ << "Find the new leader " << nLeader; } + /** + * @brief Check if candidate is in my peer + * + * @param candidate Address when received a request + * @return nebula::cpp2::ErrorCode + */ nebula::cpp2::ErrorCode checkPeer(const HostAddr& candidate) override { CHECK(!raftLock_.try_lock()); if (peers_.find(candidate) == peers_.end()) { - LOG(WARNING) << idStr_ << "The candidate " << candidate << " is not in my peers"; + VLOG(2) << idStr_ << "The candidate " << candidate << " is not in my peers"; return nebula::cpp2::ErrorCode::E_RAFT_INVALID_PEER; } return nebula::cpp2::ErrorCode::SUCCEEDED; } - // For listener, we just return true directly. Another background thread trigger the actual - // apply work, and do it in worker thread, and update lastApplyLogId_ + /** + * @brief For listener, we just return true directly. Another background thread trigger the actual + * apply work, and do it in worker thread, and update lastApplyLogId_ + * + * @return std::tuple + */ std::tuple commitLogs(std::unique_ptr, bool) override; - // For most of the listeners, just return true is enough. However, if listener need to be aware - // of membership change, some log type of wal need to be pre-processed, could do it here. + /** + * @brief For most of the listeners, just return true is enough. However, if listener need to be + * aware of membership change, some log type of wal need to be pre-processed, could do it here. + * + * @param logId Log id to pre-process + * @param termId Log term to pre-process + * @param clusterId Cluster id in wal + * @param log Log message in wal + * @return True if succeed. False if failed. + */ bool preProcessLog(LogID logId, TermID termId, ClusterID clusterId, const std::string& log) override; - // If the listener falls behind way to much than leader, the leader will send all its data - // in snapshot by batch, listener need to implement it if it need handle this case. The return - // value is a pair of of this batch. + /** + * @brief If the listener falls behind way to much than leader, the leader will send all its data + * in snapshot by batch, listener need to implement this method to apply the batch to state + * machine. The return value is a pair of of this batch. + * + * @param data Data to apply + * @param committedLogId Commit log id of snapshot + * @param committedLogTerm Commit log term of snapshot + * @param finished Whether spapshot is finished + * @return std::pair Return count and size of in the data + */ std::pair commitSnapshot(const std::vector& data, LogID committedLogId, TermID committedLogTerm, bool finished) override; + /** + * @brief Background job thread will trigger doApply to apply data into state machine periodically + */ void doApply(); protected: diff --git a/src/kvstore/ListenerFactory.h b/src/kvstore/ListenerFactory.h index 1e935a7ffb9..95af6dad1c2 100644 --- a/src/kvstore/ListenerFactory.h +++ b/src/kvstore/ListenerFactory.h @@ -12,9 +12,19 @@ namespace nebula { namespace kvstore { +/** + * @brief Factory to build listener + */ class ListenerFactory { public: template + /** + * @brief Create a Listener object + * + * @param type Type of listener + * @param args Other parameters + * @return std::shared_ptr + */ static std::shared_ptr createListener(meta::cpp2::ListenerType type, Args&&... args) { if (type == meta::cpp2::ListenerType::ELASTICSEARCH) { return std::make_shared(std::forward(args)...); diff --git a/src/kvstore/LogEncoder.h b/src/kvstore/LogEncoder.h index c4d4ba3d427..11bac4305eb 100644 --- a/src/kvstore/LogEncoder.h +++ b/src/kvstore/LogEncoder.h @@ -13,11 +13,11 @@ namespace nebula { namespace kvstore { enum LogType : char { - OP_PUT = 0x1, - OP_MULTI_PUT = 0x2, - OP_REMOVE = 0x3, - OP_MULTI_REMOVE = 0x4, - OP_REMOVE_RANGE = 0x6, + OP_PUT = 0x01, + OP_MULTI_PUT = 0x02, + OP_REMOVE = 0x03, + OP_MULTI_REMOVE = 0x04, + OP_REMOVE_RANGE = 0x06, OP_ADD_LEARNER = 0x07, OP_TRANS_LEADER = 0x08, OP_ADD_PEER = 0x09, @@ -26,39 +26,135 @@ enum LogType : char { }; enum BatchLogType : char { - OP_BATCH_PUT = 0x1, - OP_BATCH_REMOVE = 0x2, - OP_BATCH_REMOVE_RANGE = 0x3, + OP_BATCH_PUT = 0x01, + OP_BATCH_REMOVE = 0x02, + OP_BATCH_REMOVE_RANGE = 0x03, }; +/** + * @brief Encode key value into a string + * + * @param key + * @param val + * @return std::string Encoded string + */ std::string encodeKV(const folly::StringPiece& key, const folly::StringPiece& val); +/** + * @brief Decode key/value from string + * + * @param data Encoded string + * @return std::pair Decoded key/value + */ std::pair decodeKV(const std::string& data); +/** + * @brief Encode single value into a wal log + * + * @param type Log type + * @param val Log message, usually is a output of 'encodeKV' + * @return std::string Encoded wal + */ std::string encodeSingleValue(LogType type, folly::StringPiece val); + +/** + * @brief Decode single value from wal log + * + * @param encoded Encoded wal log + * @return folly::StringPiece Decoded value + */ folly::StringPiece decodeSingleValue(folly::StringPiece encoded); +/** + * @brief Encode multiple value into a wal log + * + * @param type Log type + * @param values Log messages + * @return std::string Encoded wal + */ std::string encodeMultiValues(LogType type, const std::vector& values); + +/** + * @brief Encode multiple key/value into a wal log + * + * @param type Log type + * @param kvs Log messages + * @return std::string Encoded wal + */ std::string encodeMultiValues(LogType type, const std::vector& kvs); + +/** + * @brief Overload version of encodeMultiValues + */ std::string encodeMultiValues(LogType type, folly::StringPiece v1, folly::StringPiece v2); + +/** + * @brief Decode multiple values from encoded wal log + * + * @param encoded Encoded wal log + * @return std::vector Decoded values + */ std::vector decodeMultiValues(folly::StringPiece encoded); +/** + * @brief Encode a log batch + * + * @param batch + * @return std::string Encoded wal + */ std::string encodeBatchValue( const std::vector>& batch); +/** + * @brief Decode into log batchs + * + * @param encoded Encoded wal + * @return std::vector>> + * Log batch + */ std::vector>> decodeBatchValue(folly::StringPiece encoded); +/** + * @brief Encode a host into wal log + * + * @param type Log type + * @param learner Host address + * @return std::string Encoded wal + */ std::string encodeHost(LogType type, const HostAddr& learner); + +/** + * @brief Decode a host from wal log + * + * @param type Log type + * @param encoded Encoded wal + * @return HostAddr Decoded host address + */ HostAddr decodeHost(LogType type, const folly::StringPiece& encoded); -int64_t getTimestamp(const folly::StringPiece& command); +/** + * @brief Get the timestamp from wal + * + * @param log WAL log + * @return int64_t timestamp in utc + */ +int64_t getTimestamp(const folly::StringPiece& log); +/** + * @brief A wrapper class of batchs of log, support put/remove/removeRange + */ class BatchHolder : public nebula::cpp::NonCopyable, public nebula::cpp::NonMovable { public: BatchHolder() = default; ~BatchHolder() = default; + /** + * @brief Add a put operation to batch + * + * @param key Key to put + * @param val Value to put + */ void put(std::string&& key, std::string&& val) { size_ += key.size() + val.size(); auto op = std::make_tuple( @@ -66,12 +162,23 @@ class BatchHolder : public nebula::cpp::NonCopyable, public nebula::cpp::NonMova batch_.emplace_back(std::move(op)); } + /** + * @brief Add a remove operation to batch + * + * @param key Key to remove + */ void remove(std::string&& key) { size_ += key.size(); auto op = std::make_tuple(BatchLogType::OP_BATCH_REMOVE, std::forward(key), ""); batch_.emplace_back(std::move(op)); } + /** + * @brief Add a remove range operation to batch, [start, end) + * + * @param begin Start key to remove + * @param end End key to remove + */ void rangeRemove(std::string&& begin, std::string&& end) { size_ += begin.size() + end.size(); auto op = std::make_tuple(BatchLogType::OP_BATCH_REMOVE_RANGE, @@ -80,15 +187,25 @@ class BatchHolder : public nebula::cpp::NonCopyable, public nebula::cpp::NonMova batch_.emplace_back(std::move(op)); } + /** + * @brief reserve spaces for batch + */ void reserve(int32_t size) { batch_.reserve(size); } + /** + * @brief Get the batch object + * + * @return const std::vector>& + */ const std::vector>& getBatch() { return batch_; } - // size of the batch, in bytes + /** + * @brief size of key in operaion of the batch, in bytes + */ size_t size() { return size_; } diff --git a/src/kvstore/NebulaSnapshotManager.cpp b/src/kvstore/NebulaSnapshotManager.cpp index a59d7e8ee46..4bff03a0a40 100644 --- a/src/kvstore/NebulaSnapshotManager.cpp +++ b/src/kvstore/NebulaSnapshotManager.cpp @@ -79,8 +79,8 @@ bool NebulaSnapshotManager::accessTable(GraphSpaceID spaceId, std::unique_ptr iter; auto ret = store_->prefix(spaceId, partId, prefix, &iter, false, snapshot); if (ret != nebula::cpp2::ErrorCode::SUCCEEDED) { - LOG(INFO) << "[spaceId:" << spaceId << ", partId:" << partId << "] access prefix failed" - << ", error code:" << static_cast(ret); + VLOG(2) << "[spaceId:" << spaceId << ", partId:" << partId << "] access prefix failed" + << ", error code:" << static_cast(ret); cb(data, totalCount, totalSize, raftex::SnapshotStatus::FAILED); return false; } @@ -95,7 +95,7 @@ bool NebulaSnapshotManager::accessTable(GraphSpaceID spaceId, data.clear(); batchSize = 0; } else { - LOG(INFO) << "[spaceId:" << spaceId << ", partId:" << partId << "] send snapshot failed"; + VLOG(2) << "[spaceId:" << spaceId << ", partId:" << partId << "] send snapshot failed"; return false; } } diff --git a/src/kvstore/NebulaSnapshotManager.h b/src/kvstore/NebulaSnapshotManager.h index 9b9e27c86b7..1d1bced436c 100644 --- a/src/kvstore/NebulaSnapshotManager.h +++ b/src/kvstore/NebulaSnapshotManager.h @@ -20,11 +20,31 @@ class NebulaSnapshotManager : public raftex::SnapshotManager { public: explicit NebulaSnapshotManager(NebulaStore* kv); + /** + * @brief Scan all data and trigger callback to send to peer + * + * @param spaceId + * @param partId + * @param cb Callback when scan some amount of data + */ void accessAllRowsInSnapshot(GraphSpaceID spaceId, PartitionID partId, raftex::SnapshotCallback cb) override; private: + /** + * @brief Collect some data by prefix, and trigger callback when scan some amount of data + * + * @param spaceId + * @param partId + * @param prefix Prefix to scan + * @param cb Callback when scan some amount of data + * @param data Data container + * @param totalCount Data count + * @param totalSize Data size in bytes + * @param rateLimiter Rate limiter to restrict sending speed + * @return True if succeed. False if failed. + */ bool accessTable(GraphSpaceID spaceId, PartitionID partId, const void* snapshot, diff --git a/src/kvstore/NebulaStore.cpp b/src/kvstore/NebulaStore.cpp index 18cab45c9ed..16fd5b53a42 100644 --- a/src/kvstore/NebulaStore.cpp +++ b/src/kvstore/NebulaStore.cpp @@ -92,7 +92,7 @@ void NebulaStore::loadPartFromDataPath() { try { spaceId = folly::to(dir); } catch (const std::exception& ex) { - LOG(ERROR) << "Data path invalid: " << ex.what(); + LOG(ERROR) << folly::sformat("Data path {} invalid {}", dir, ex.what()); continue; } @@ -303,7 +303,6 @@ void NebulaStore::addPart(GraphSpaceID spaceId, if (partIt != spaceIt->second->parts_.end()) { LOG(INFO) << "[Space: " << spaceId << ", Part: " << partId << "] has existed!"; if (!peers.empty()) { - LOG(INFO) << "[Space: " << spaceId << ", Part: " << partId << "] check peers..."; partIt->second->checkAndResetPeers(peers); } return; @@ -354,9 +353,10 @@ std::shared_ptr NebulaStore::newPart(GraphSpaceID spaceId, // pull the information from meta auto metaStatus = options_.partMan_->partMeta(spaceId, partId); if (!metaStatus.ok()) { - LOG(ERROR) << "options_.partMan_->partMeta(spaceId, partId); error: " - << metaStatus.status().toString() << " spaceId: " << spaceId - << ", partId: " << partId; + LOG(ERROR) << folly::sformat("Can't find space {} part {} from meta: {}", + spaceId, + partId, + metaStatus.status().toString()); return nullptr; } @@ -567,9 +567,10 @@ void NebulaStore::removeSpaceDir(const std::string& dir) { try { LOG(INFO) << "Try to remove space directory: " << dir; boost::filesystem::remove_all(dir); + LOG(INFO) << "Space directory removed: " << dir; } catch (const boost::filesystem::filesystem_error& e) { - LOG(ERROR) << "Exception caught while remove directory, please delete it by manual: " - << e.what(); + LOG(WARNING) << "Exception caught while remove directory, please delete it by manual: " + << e.what(); } } @@ -808,6 +809,7 @@ nebula::cpp2::ErrorCode NebulaStore::ingest(GraphSpaceID spaceId) { if (!ok(spaceRet)) { return error(spaceRet); } + LOG(INFO) << "Ingesting space " << spaceId; auto space = nebula::value(spaceRet); for (auto& engine : space->engines_) { auto parts = engine->allParts(); @@ -819,13 +821,13 @@ nebula::cpp2::ErrorCode NebulaStore::ingest(GraphSpaceID spaceId) { auto path = folly::stringPrintf("%s/download/%d", value(ret)->getDataRoot(), part); if (!fs::FileUtils::exist(path)) { - LOG(INFO) << path << " not existed"; + VLOG(1) << path << " not existed while ingesting"; continue; } auto files = nebula::fs::FileUtils::listAllFilesInDir(path.c_str(), true, "*.sst"); for (auto file : files) { - LOG(INFO) << "Ingesting extra file: " << file; + VLOG(1) << "Ingesting extra file: " << file; auto code = engine->ingest(std::vector({file})); if (code != nebula::cpp2::ErrorCode::SUCCEEDED) { return code; @@ -879,7 +881,7 @@ nebula::cpp2::ErrorCode NebulaStore::compact(GraphSpaceID spaceId) { auto code = nebula::cpp2::ErrorCode::SUCCEEDED; std::vector threads; - LOG(INFO) << "Space " << spaceId << " start compaction."; + LOG(INFO) << "Space " << spaceId << " start manual compaction."; for (auto& engine : space->engines_) { threads.emplace_back(std::thread([&engine, &code] { auto ret = engine->compact(); @@ -941,8 +943,8 @@ ErrorOr> NebulaStore: std::string path = folly::sformat("{}/checkpoints/{}", engine->getDataRoot(), name); if (!fs::FileUtils::exist(path)) { if (!fs::FileUtils::makeDir(path)) { - LOG(ERROR) << "Make checkpoint dir: " << path << " failed"; - return nebula::cpp2::ErrorCode::E_UNKNOWN; + LOG(WARNING) << "Make checkpoint dir: " << path << " failed"; + return nebula::cpp2::ErrorCode::E_FAILED_TO_CHECKPOINT; } } @@ -959,7 +961,8 @@ ErrorOr> NebulaStore: for (auto& partId : parts) { auto ret = this->part(spaceId, partId); if (!ok(ret)) { - LOG(ERROR) << "Part not found. space : " << spaceId << " Part : " << partId; + LOG(WARNING) << folly::sformat( + "space {} part {} not found while creating checkpoint", spaceId, partId); return error(ret); } @@ -981,7 +984,7 @@ ErrorOr> NebulaStore: auto result = nebula::fs::FileUtils::realPath(path.c_str()); if (!result.ok()) { - LOG(ERROR) << "Failed to get path:" << path << "'s real path"; + LOG(WARNING) << "Failed to get path:" << path << "'s real path"; return nebula::cpp2::ErrorCode::E_FAILED_TO_CHECKPOINT; } @@ -1006,13 +1009,13 @@ nebula::cpp2::ErrorCode NebulaStore::dropCheckpoint(GraphSpaceID spaceId, const * Drop checkpoint and wal together **/ auto checkpointPath = folly::sformat("{}/checkpoints/{}", engine->getDataRoot(), name); - LOG(INFO) << "Drop checkpoint : " << checkpointPath; + LOG(INFO) << "Drop checkpoint: " << checkpointPath; if (!fs::FileUtils::exist(checkpointPath)) { continue; } if (!fs::FileUtils::remove(checkpointPath.data(), true)) { - LOG(ERROR) << "Drop checkpoint dir failed : " << checkpointPath; + LOG(WARNING) << "Drop checkpoint dir failed : " << checkpointPath; return nebula::cpp2::ErrorCode::E_STORE_FAILURE; } } @@ -1022,7 +1025,7 @@ nebula::cpp2::ErrorCode NebulaStore::dropCheckpoint(GraphSpaceID spaceId, const nebula::cpp2::ErrorCode NebulaStore::setWriteBlocking(GraphSpaceID spaceId, bool sign) { auto spaceRet = space(spaceId); if (!ok(spaceRet)) { - LOG(ERROR) << "Get Space " << spaceId << " Failed"; + LOG(WARNING) << "Get Space " << spaceId << " Failed"; return error(spaceRet); } auto space = nebula::value(spaceRet); @@ -1031,7 +1034,7 @@ nebula::cpp2::ErrorCode NebulaStore::setWriteBlocking(GraphSpaceID spaceId, bool for (auto& part : parts) { auto partRet = this->part(spaceId, part); if (!ok(partRet)) { - LOG(ERROR) << "Part not found. space : " << spaceId << " Part : " << part; + LOG(WARNING) << "Part not found. space : " << spaceId << " Part : " << part; return error(partRet); } auto p = nebula::value(partRet); @@ -1175,8 +1178,8 @@ ErrorOr> NebulaStore::backupTa if (result != nebula::cpp2::ErrorCode::E_BACKUP_EMPTY_TABLE) { return result; } - LOG(WARNING) << "Since the table(" << tablePrefix - << ") is empty, the backup of the current table is skipped."; + LOG(INFO) << "Since the table(" << tablePrefix + << ") is empty, the backup of the current table is skipped."; continue; } backupPath.emplace_back(value(path)); @@ -1193,7 +1196,7 @@ nebula::cpp2::ErrorCode NebulaStore::restoreFromFiles(GraphSpaceID spaceId, const std::vector& files) { auto spaceRet = space(spaceId); if (!ok(spaceRet)) { - LOG(ERROR) << "Get Space " << spaceId << " Failed"; + LOG(WARNING) << "Get Space " << spaceId << " Failed"; return error(spaceRet); } auto space = nebula::value(spaceRet); @@ -1214,7 +1217,7 @@ nebula::cpp2::ErrorCode NebulaStore::multiPutWithoutReplicator(GraphSpaceID spac std::vector keyValues) { auto spaceRet = space(spaceId); if (!ok(spaceRet)) { - LOG(ERROR) << "Get Space " << spaceId << " Failed"; + LOG(WARNING) << "Get Space " << spaceId << " Failed"; return error(spaceRet); } auto space = nebula::value(spaceRet); @@ -1235,7 +1238,7 @@ ErrorOr NebulaStore::getProperty( GraphSpaceID spaceId, const std::string& property) { auto spaceRet = space(spaceId); if (!ok(spaceRet)) { - LOG(ERROR) << "Get Space " << spaceId << " Failed"; + LOG(WARNING) << "Get Space " << spaceId << " Failed"; return error(spaceRet); } auto space = nebula::value(spaceRet); diff --git a/src/kvstore/NebulaStore.h b/src/kvstore/NebulaStore.h index 985d6e52bd5..6d032d62e86 100644 --- a/src/kvstore/NebulaStore.h +++ b/src/kvstore/NebulaStore.h @@ -33,7 +33,6 @@ struct SpacePartInfo { ~SpacePartInfo() { parts_.clear(); engines_.clear(); - LOG(INFO) << "~SpacePartInfo()"; } std::unordered_map> parts_; @@ -44,6 +43,9 @@ struct SpaceListenerInfo { std::unordered_map listeners_; }; +/** + * @brief A derived class of KVStore, interfaces to manipulate data + */ class NebulaStore : public KVStore, public Handler { FRIEND_TEST(NebulaStoreTest, SimpleTest); FRIEND_TEST(NebulaStoreTest, PartsTest); @@ -55,6 +57,14 @@ class NebulaStore : public KVStore, public Handler { friend class ListenerBasicTest; public: + /** + * @brief Construct a new NebulaStore object + * + * @param options + * @param ioPool IOThreadPool + * @param serviceAddr Address of NebulaStore, used in raft + * @param workers Worker thread + */ NebulaStore(KVOptions options, std::shared_ptr ioPool, HostAddr serviceAddr, @@ -72,53 +82,105 @@ class NebulaStore : public KVStore, public Handler { ~NebulaStore(); - // Calculate the raft service address based on the storage service address + /** + * @brief Calculate the raft service address based on the storage service address + * + * @param srvcAddr Storage service address + * @return HostAddr Raft service address + */ static HostAddr getRaftAddr(const HostAddr& srvcAddr) { return Utils::getRaftAddrFromStoreAddr(srvcAddr); } + /** + * @brief Calculate the storage service address based on the raft service address + * + * @param raftAddr Raft service address + * @return HostAddr Storage service address + */ static HostAddr getStoreAddr(const HostAddr& raftAddr) { return Utils::getStoreAddrFromRaftAddr(raftAddr); } - // Pull meta information from the PartManager and initiate - // the current store instance + /** + * @brief Pull meta information from the PartManager and initiate the current store instance + * + * @return True if succeed. False if failed. + */ bool init(); + /** + * @brief Stop the raft service and kv engine + */ void stop() override; + /** + * @brief Return bit-wise capability, not used + */ uint32_t capability() const override { return 0; } + /** + * @brief Return storage service address + */ HostAddr address() const { return storeSvcAddr_; } + /** + * @brief Get the IOThreadPool + */ std::shared_ptr getIoPool() const { return ioPool_; } + /** + * @brief Get the Background workers + */ std::shared_ptr getBgWorkers() const { return bgWorkers_; } + /** + * @brief Get the worker executors + */ std::shared_ptr getExecutors() const { return workers_; } - // Return the current leader + /** + * @brief Return the current leader + * + * @param spaceId + * @param partId + * @return ErrorOr Get the leader address of given partition if + * succeed, else return ErrorCode + */ ErrorOr partLeader(GraphSpaceID spaceId, PartitionID partId) override; + /** + * @brief Return pointer of part manager + * + * @return PartManager* + */ PartManager* partManager() const override { return options_.partMan_.get(); } + /** + * @brief Return the NebulaStore is started as listener + */ bool isListener() const { return !options_.listenerPath_.empty(); } + /** + * @brief Get the data paths passed from configuration + * + * @return std::vector Data paths + */ std::vector getDataRoot() const override { return options_.dataPaths_; } @@ -134,6 +196,7 @@ class NebulaStore : public KVStore, public Handler { const void* GetSnapshot(GraphSpaceID spaceId, PartitionID partID, bool canReadFromFollower = false) override; + /** * @brief Release snapshot from engine. * @@ -143,12 +206,34 @@ class NebulaStore : public KVStore, public Handler { */ void ReleaseSnapshot(GraphSpaceID spaceId, PartitionID partId, const void* snapshot) override; + /** + * @brief Read a single key + * + * @param spaceId + * @param partId + * @param key + * @param value + * @param canReadFromFollower Whether check if current kvstore is leader of given partition + * @return nebula::cpp2::ErrorCode + */ nebula::cpp2::ErrorCode get(GraphSpaceID spaceId, PartitionID partId, const std::string& key, std::string* value, bool canReadFromFollower = false) override; + /** + * @brief Read a list of keys + * + * @param spaceId + * @param partId + * @param keys Keys to read + * @param values Pointers of value + * @param canReadFromFollower Whether check if current kvstore is leader of given partition + * @return Return std::vector when suceeded: Result status of each key, if key[i] does not + * exist, the i-th value in return value would be Status::KeyNotFound. Return ErrorCode when + * failed + */ std::pair> multiGet( GraphSpaceID spaceId, PartitionID partId, @@ -156,7 +241,17 @@ class NebulaStore : public KVStore, public Handler { std::vector* values, bool canReadFromFollower = false) override; - // Get all results in range [start, end) + /** + * @brief Get all results in range [start, end) + * + * @param spaceId + * @param partId + * @param start Start key, inclusive + * @param end End key, exclusive + * @param iter Iterator in range [start, end), returns by kv engine + * @param canReadFromFollower Whether check if current kvstore is leader of given partition + * @return nebula::cpp2::ErrorCode + */ nebula::cpp2::ErrorCode range(GraphSpaceID spaceId, PartitionID partId, const std::string& start, @@ -164,7 +259,9 @@ class NebulaStore : public KVStore, public Handler { std::unique_ptr* iter, bool canReadFromFollower = false) override; - // Delete the overloading with a rvalue `start' and `end' + /** + * @brief To forbid to pass rvalue via the 'range' parameter. + */ nebula::cpp2::ErrorCode range(GraphSpaceID spaceId, PartitionID partId, std::string&& start, @@ -172,7 +269,16 @@ class NebulaStore : public KVStore, public Handler { std::unique_ptr* iter, bool canReadFromFollower = false) override = delete; - // Get all results with prefix. + /** + * @brief Get all results with 'prefix' str as prefix. + * + * @param spaceId + * @param partId + * @param prefix Key of prefix to seek + * @param iter Iterator of keys starts with 'prefix', returns by kv engine + * @param canReadFromFollower Whether check if current kvstore is leader of given partition + * @return nebula::cpp2::ErrorCode + */ nebula::cpp2::ErrorCode prefix(GraphSpaceID spaceId, PartitionID partId, const std::string& prefix, @@ -180,7 +286,9 @@ class NebulaStore : public KVStore, public Handler { bool canReadFromFollower = false, const void* snapshot = nullptr) override; - // Delete the overloading with a rvalue `prefix' + /** + * @brief To forbid to pass rvalue via the 'prefix' parameter. + */ nebula::cpp2::ErrorCode prefix(GraphSpaceID spaceId, PartitionID partId, std::string&& prefix, @@ -188,7 +296,17 @@ class NebulaStore : public KVStore, public Handler { bool canReadFromFollower = false, const void* snapshot = nullptr) override = delete; - // Get all results with prefix starting from start + /** + * @brief Get all results with 'prefix' str as prefix starting form 'start' + * + * @param spaceId + * @param partId + * @param start Start key, inclusive + * @param prefix The prefix of keys to iterate + * @param iter Iterator of keys starts with 'prefix' beginning from 'start', returns by kv engine + * @param canReadFromFollower Whether check if current kvstore is leader of given partition + * @return nebula::cpp2::ErrorCode + */ nebula::cpp2::ErrorCode rangeWithPrefix(GraphSpaceID spaceId, PartitionID partId, const std::string& start, @@ -196,7 +314,9 @@ class NebulaStore : public KVStore, public Handler { std::unique_ptr* iter, bool canReadFromFollower = false) override; - // Delete the overloading with a rvalue `prefix' + /** + * @brief To forbid to pass rvalue via the 'rangeWithPrefix' parameter. + */ nebula::cpp2::ErrorCode rangeWithPrefix(GraphSpaceID spaceId, PartitionID partId, std::string&& start, @@ -204,171 +324,500 @@ class NebulaStore : public KVStore, public Handler { std::unique_ptr* iter, bool canReadFromFollower = false) override = delete; + /** + * @brief Synchronize the kvstore across multiple replica by add a empty log + * + * @param spaceId + * @param partId + * @return nebula::cpp2::ErrorCode + */ nebula::cpp2::ErrorCode sync(GraphSpaceID spaceId, PartitionID partId) override; - // async batch put. + /** + * @brief Write multiple key/values to kvstore asynchronously + * + * @param spaceId + * @param partId + * @param keyValues Key/values to put + * @param cb Callback when has a result + */ void asyncMultiPut(GraphSpaceID spaceId, PartitionID partId, std::vector&& keyValues, KVCallback cb) override; + /** + * @brief Remove a key from kvstore asynchronously + * + * @param spaceId + * @param partId + * @param key Key to remove + * @param cb Callback when has a result + */ void asyncRemove(GraphSpaceID spaceId, PartitionID partId, const std::string& key, KVCallback cb) override; + /** + * @brief Remove multible keys from kvstore asynchronously + * + * @param spaceId + * @param partId + * @param key Keys to remove + * @param cb Callback when has a result + */ void asyncMultiRemove(GraphSpaceID spaceId, PartitionID partId, std::vector&& keys, KVCallback cb) override; + /** + * @brief Remove keys in range [start, end) asynchronously + * + * @param spaceId + * @param partId + * @param start Start key + * @param end End key + * @param cb Callback when has a result + */ void asyncRemoveRange(GraphSpaceID spaceId, PartitionID partId, const std::string& start, const std::string& end, KVCallback cb) override; + /** + * @brief Async commit multi operation, difference between asyncMultiPut or asyncMultiRemove + * is this method allow contains both put and remove together, difference between asyncAtomicOp is + * that asyncAtomicOp may have CAS + * + * @param spaceId + * @param partId + * @param batch Encoded write batch + * @param cb Callback when has a result + */ void asyncAppendBatch(GraphSpaceID spaceId, PartitionID partId, std::string&& batch, KVCallback cb) override; + /** + * @brief Do some atomic operation on kvstore + * + * @param spaceId + * @param partId + * @param op Atomic operation + * @param cb Callback when has a result + */ void asyncAtomicOp(GraphSpaceID spaceId, PartitionID partId, raftex::AtomicOp op, KVCallback cb) override; + /** + * @brief Get the part object of given spaceId and partId + * + * @param spaceId + * @param partId + * @return ErrorOr> Return the part if succeeed, + * else return ErrorCode + */ ErrorOr> part(GraphSpaceID spaceId, PartitionID partId) override; + /** + * @brief Ingest the sst file under download directory + * + * @param spaceId + * @return nebula::cpp2::ErrorCode + */ nebula::cpp2::ErrorCode ingest(GraphSpaceID spaceId) override; + /** + * @brief Set space related rocksdb option + * + * @param spaceId + * @param configKey Config name + * @param configValue Config value + * @return nebula::cpp2::ErrorCode + */ nebula::cpp2::ErrorCode setOption(GraphSpaceID spaceId, const std::string& configKey, const std::string& configValue); + /** + * @brief Set space related rocksdb db option + * + * @param spaceId + * @param configKey DB Config name + * @param configValue Config value + * @return nebula::cpp2::ErrorCode + */ nebula::cpp2::ErrorCode setDBOption(GraphSpaceID spaceId, const std::string& configKey, const std::string& configValue); + /** + * @brief Trigger comapction, only used in rocksdb + * + * @param spaceId + * @return nebula::cpp2::ErrorCode + */ nebula::cpp2::ErrorCode compact(GraphSpaceID spaceId) override; + /** + * @brief Trigger flush, only used in rocksdb + * + * @param spaceId + * @return nebula::cpp2::ErrorCode + */ nebula::cpp2::ErrorCode flush(GraphSpaceID spaceId) override; + /** + * @brief Create a Checkpoint, only used in rocksdb + * + * @param spaceId + * @param name Checkpoint name + * @return ErrorOr> Return the + * checkpoint info if succeed, else return ErrorCode + */ ErrorOr> createCheckpoint( GraphSpaceID spaceId, const std::string& name) override; + /** + * @brief Trigger kv engine's backup, mainly for rocksdb PlainTable mounted on tmpfs/ramfs + * + * @return nebula::cpp2::ErrorCode + */ nebula::cpp2::ErrorCode backup(); + /** + * @brief Drop a Checkpoint, only used in rocksdb + * + * @param spaceId + * @param name Checkpoint name + * @return nebula::cpp2::ErrorCode + */ nebula::cpp2::ErrorCode dropCheckpoint(GraphSpaceID spaceId, const std::string& name) override; + /** + * @brief Set the write blocking flag, if blocked, only heartbeat can be replicated + * + * @param spaceId + * @param sign True to block. Falst to unblock + * @return nebula::cpp2::ErrorCode + */ nebula::cpp2::ErrorCode setWriteBlocking(GraphSpaceID spaceId, bool sign) override; + /** + * @brief Whether is leader of given partiton or not + * + * @param spaceId + * @param partId + */ bool isLeader(GraphSpaceID spaceId, PartitionID partId); + /** + * @brief Try to retrieve the space part info of given spaceId + * + * @param spaceId + * @return ErrorOr> Return space part info + * when succeed, return Errorcode when faile + */ ErrorOr> space(GraphSpaceID spaceId); + /** + * @brief Try to retrieve the space listener info of given spaceId + * + * @param spaceId + * @return ErrorOr> Return space + * listener info when succeed, return Errorcode when faile + */ ErrorOr> spaceListener( GraphSpaceID spaceId); /** - * Implement four interfaces in Handler. - * */ + * @brief Add a space, called from part manager + * + * @param spaceId + * @param isListener Whether the space is listener + */ void addSpace(GraphSpaceID spaceId, bool isListener = false) override; + /** + * @brief Add a partition, called from part manager + * + * @param spaceId + * @param partId + * @param asLearner Whether start partition as learner + * @param peers Raft peers + */ void addPart(GraphSpaceID spaceId, PartitionID partId, bool asLearner, const std::vector& peers = {}) override; + /** + * @brief Remove a space, called from part manager + * + * @param spaceId + * @param isListener Whether the space is listener + */ void removeSpace(GraphSpaceID spaceId, bool isListener) override; + /** + * @brief Remove a partition, called from part manager + * + * @param spaceId + * @param partId + */ void removePart(GraphSpaceID spaceId, PartitionID partId) override; + /** + * @brief Retrive the leader distribution + * + * @param leaderIds The leader address of all partitions + * @return int32_t The leader count of all spaces + */ int32_t allLeader( std::unordered_map>& leaderIds) override; + /** + * @brief Backup the data of a table prefix, for meta backup + * + * @param spaceId + * @param name + * @param tablePrefix Table prefix + * @param filter Data filter when iterate the table + * @return ErrorOr> Return the sst file path if + * succeed, else return ErrorCode + */ ErrorOr> backupTable( GraphSpaceID spaceId, const std::string& name, const std::string& tablePrefix, std::function filter) override; + /** + * @brief Restore from sst files + * + * @param spaceId + * @param files SST file path + * @return nebula::cpp2::ErrorCode + */ nebula::cpp2::ErrorCode restoreFromFiles(GraphSpaceID spaceId, const std::vector& files) override; + /** + * @brief Add a partition as listener + * + * @param spaceId + * @param partId + * @param type Listener type + * @param peers Raft peers of listener + */ void addListener(GraphSpaceID spaceId, PartitionID partId, meta::cpp2::ListenerType type, const std::vector& peers) override; + /** + * @brief Remove a listener partition + * + * @param spaceId + * @param partId + * @param type Listener type + */ void removeListener(GraphSpaceID spaceId, PartitionID partId, meta::cpp2::ListenerType type) override; + /** + * @brief Check if the partition's listener state has changed, add/remove if necessary + * + * @param spaceId + * @param partId + * @param remoteListeners The given partition's remote listener list + */ void checkRemoteListeners(GraphSpaceID spaceId, PartitionID partId, const std::vector& remoteListeners) override; + /** + * @brief Get all partitions grouped by data path and spaceId + * + * @param diskParts Get all space data path and all partition in the path + */ void fetchDiskParts(SpaceDiskPartsMap& diskParts) override; + /** + * @brief Write data to local storage engine only + * + * @param spaceId + * @param keyValues Key/values to write into only local storage engine instead of multiple replica + * @return nebula::cpp2::ErrorCode + */ nebula::cpp2::ErrorCode multiPutWithoutReplicator(GraphSpaceID spaceId, std::vector keyValues) override; + /** + * @brief Get the kvstore propery, only used in rocksdb + * + * @param spaceId + * @param property Property name + * @return ErrorOr Return the property value in string if + * succeed, else return ErrorCode + */ ErrorOr getProperty(GraphSpaceID spaceId, const std::string& property) override; + + /** + * @brief Register callback when found new partition is added + * + * @param funcName Modulename + * @param func Callback + * @param existParts All existing partitions + */ void registerOnNewPartAdded(const std::string& funcName, std::function&)> func, std::vector>& existParts); + /** + * @brief Unregister a module's callback + * + * @param funcName modulename + */ void unregisterOnNewPartAdded(const std::string& funcName) { onNewPartAdded_.erase(funcName); } + /** + * @brief Register callback to cleanup before a space is removed + * + * @param func Callback to cleanup + */ void registerBeforeRemoveSpace(std::function func) { beforeRemoveSpace_ = func; } + /** + * @brief Unregister the callback to cleanup before a space is removed + * + */ void unregisterBeforeRemoveSpace() { beforeRemoveSpace_ = nullptr; } private: + /** + * @brief Load partitions by reading system part keys in kv engine + */ void loadPartFromDataPath(); + /** + * @brief Load partitions from meta + */ void loadPartFromPartManager(); + /** + * @brief Load and start listener of local address + */ void loadLocalListenerFromPartManager(); + /** + * @brief Load and add remote listener into current partition's raft group + */ void loadRemoteListenerFromPartManager(); + /** + * @brief Update space specific options + * + * @param spaceId + * @param options Options map + * @param isDbOption + */ void updateSpaceOption(GraphSpaceID spaceId, const std::unordered_map& options, bool isDbOption) override; + /** + * @brief Start a new kv engine on specified path + * + * @param spaceId + * @param dataPath + * @param walPath + * @return std::unique_ptr + */ std::unique_ptr newEngine(GraphSpaceID spaceId, const std::string& dataPath, const std::string& walPath); + /** + * @brief Start a new part + * + * @param spaceId + * @param partId + * @param engine Partition's related kv engine + * @param asLearner Whether start as raft learner + * @param defaultPeers The raft peer's address + * @return std::shared_ptr + */ std::shared_ptr newPart(GraphSpaceID spaceId, PartitionID partId, KVEngine* engine, bool asLearner, const std::vector& defaultPeers); + /** + * @brief Start a new listener part + * + * @param spaceId + * @param partId + * @param type Listener type + * @param peers The raft peer's address + * @return std::shared_ptr + */ std::shared_ptr newListener(GraphSpaceID spaceId, PartitionID partId, meta::cpp2::ListenerType type, const std::vector& peers); + /** + * @brief Get given partition's kv engine + * + * @param spaceId + * @param partId + * @return ErrorOr Return kv engine if succeed, return + * ErrorCode if failed + */ ErrorOr engine(GraphSpaceID spaceId, PartitionID partId); + /** + * @brief Check if the partition is leader of not + * + * @param part + * @param canReadFromFollower If set to true, will skip the check and return true + * @return True if we regard as leader + */ bool checkLeader(std::shared_ptr part, bool canReadFromFollower = false) const; + /** + * @brief clean useless wal + */ void cleanWAL(); + /** + * @brief Get the vertex id length of given space + * + * @param spaceId + * @return int32_t Vertex id length + */ int32_t getSpaceVidLen(GraphSpaceID spaceId); + /** + * @brief Remove a space's directory + */ void removeSpaceDir(const std::string& dir); private: diff --git a/src/kvstore/Part.cpp b/src/kvstore/Part.cpp index a5f1bd190ff..ddeaef02b65 100644 --- a/src/kvstore/Part.cpp +++ b/src/kvstore/Part.cpp @@ -50,7 +50,7 @@ std::pair Part::lastCommittedLogId() { std::string val; auto res = engine_->get(NebulaKeyUtils::systemCommitKey(partId_), &val); if (res != nebula::cpp2::ErrorCode::SUCCEEDED) { - LOG(INFO) << idStr_ << "Cannot fetch the last committed log id from the storage engine"; + VLOG(2) << idStr_ << "Cannot fetch the last committed log id from the storage engine"; return std::make_pair(0, 0); } CHECK_EQ(val.size(), sizeof(LogID) + sizeof(TermID)); @@ -124,8 +124,8 @@ void Part::asyncAddLearner(const HostAddr& learner, KVCallback cb) { std::string log = encodeHost(OP_ADD_LEARNER, learner); sendCommandAsync(std::move(log)) .thenValue([callback = std::move(cb), learner, this](nebula::cpp2::ErrorCode code) mutable { - LOG(INFO) << idStr_ << "add learner " << learner - << ", result: " << apache::thrift::util::enumNameSafe(code); + VLOG(1) << idStr_ << "add learner " << learner + << ", result: " << apache::thrift::util::enumNameSafe(code); callback(code); }); } @@ -134,8 +134,8 @@ void Part::asyncTransferLeader(const HostAddr& target, KVCallback cb) { std::string log = encodeHost(OP_TRANS_LEADER, target); sendCommandAsync(std::move(log)) .thenValue([callback = std::move(cb), target, this](nebula::cpp2::ErrorCode code) mutable { - LOG(INFO) << idStr_ << "transfer leader to " << target - << ", result: " << apache::thrift::util::enumNameSafe(code); + VLOG(1) << idStr_ << "transfer leader to " << target + << ", result: " << apache::thrift::util::enumNameSafe(code); callback(code); }); } @@ -144,8 +144,8 @@ void Part::asyncAddPeer(const HostAddr& peer, KVCallback cb) { std::string log = encodeHost(OP_ADD_PEER, peer); sendCommandAsync(std::move(log)) .thenValue([callback = std::move(cb), peer, this](nebula::cpp2::ErrorCode code) mutable { - LOG(INFO) << idStr_ << "add peer " << peer - << ", result: " << apache::thrift::util::enumNameSafe(code); + VLOG(1) << idStr_ << "add peer " << peer + << ", result: " << apache::thrift::util::enumNameSafe(code); callback(code); }); } @@ -154,8 +154,8 @@ void Part::asyncRemovePeer(const HostAddr& peer, KVCallback cb) { std::string log = encodeHost(OP_REMOVE_PEER, peer); sendCommandAsync(std::move(log)) .thenValue([callback = std::move(cb), peer, this](nebula::cpp2::ErrorCode code) mutable { - LOG(INFO) << idStr_ << "remove peer " << peer - << ", result: " << apache::thrift::util::enumNameSafe(code); + VLOG(1) << idStr_ << "remove peer " << peer + << ", result: " << apache::thrift::util::enumNameSafe(code); callback(code); }); } @@ -165,7 +165,7 @@ void Part::setBlocking(bool sign) { } void Part::onLostLeadership(TermID term) { - VLOG(1) << "Lost the leadership for the term " << term; + VLOG(2) << "Lost the leadership for the term " << term; CallbackOptions opt; opt.spaceId = spaceId_; @@ -178,11 +178,11 @@ void Part::onLostLeadership(TermID term) { } void Part::onElected(TermID term) { - VLOG(1) << "Being elected as the leader for the term: " << term; + VLOG(2) << "Being elected as the leader for the term: " << term; } void Part::onLeaderReady(TermID term) { - VLOG(1) << "leader ready to server for the term: " << term; + VLOG(2) << "leader ready to server for the term: " << term; CallbackOptions opt; opt.spaceId = spaceId_; @@ -203,7 +203,7 @@ void Part::registerOnLeaderLost(LeaderChangeCB cb) { } void Part::onDiscoverNewLeader(HostAddr nLeader) { - LOG(INFO) << idStr_ << "Find the new leader " << nLeader; + VLOG(2) << idStr_ << "Find the new leader " << nLeader; if (newLeaderCb_) { newLeaderCb_(nLeader); } @@ -220,7 +220,7 @@ std::tuple Part::commitLogs( lastTerm = iter->logTerm(); auto log = iter->logMsg(); if (log.empty()) { - VLOG(3) << idStr_ << "Skip the heartbeat!"; + VLOG(4) << idStr_ << "Skip the heartbeat!"; ++(*iter); continue; } @@ -232,7 +232,7 @@ std::tuple Part::commitLogs( DCHECK_EQ(2, pieces.size()); auto code = batch->put(pieces[0], pieces[1]); if (code != nebula::cpp2::ErrorCode::SUCCEEDED) { - LOG(ERROR) << idStr_ << "Failed to call WriteBatch::put()"; + VLOG(3) << idStr_ << "Failed to call WriteBatch::put()"; return {code, kNoCommitLogId, kNoCommitLogTerm}; } break; @@ -242,11 +242,11 @@ std::tuple Part::commitLogs( // Make the number of values are an even number DCHECK_EQ((kvs.size() + 1) / 2, kvs.size() / 2); for (size_t i = 0; i < kvs.size(); i += 2) { - VLOG(2) << "OP_MULTI_PUT " << folly::hexlify(kvs[i]) + VLOG(4) << "OP_MULTI_PUT " << folly::hexlify(kvs[i]) << ", val = " << folly::hexlify(kvs[i + 1]); auto code = batch->put(kvs[i], kvs[i + 1]); if (code != nebula::cpp2::ErrorCode::SUCCEEDED) { - LOG(ERROR) << idStr_ << "Failed to call WriteBatch::put()"; + VLOG(3) << idStr_ << "Failed to call WriteBatch::put()"; return {code, kNoCommitLogId, kNoCommitLogTerm}; } } @@ -256,7 +256,7 @@ std::tuple Part::commitLogs( auto key = decodeSingleValue(log); auto code = batch->remove(key); if (code != nebula::cpp2::ErrorCode::SUCCEEDED) { - LOG(ERROR) << idStr_ << "Failed to call WriteBatch::remove()"; + VLOG(3) << idStr_ << "Failed to call WriteBatch::remove()"; return {code, kNoCommitLogId, kNoCommitLogTerm}; } break; @@ -266,7 +266,7 @@ std::tuple Part::commitLogs( for (auto k : keys) { auto code = batch->remove(k); if (code != nebula::cpp2::ErrorCode::SUCCEEDED) { - LOG(ERROR) << idStr_ << "Failed to call WriteBatch::remove()"; + VLOG(3) << idStr_ << "Failed to call WriteBatch::remove()"; return {code, kNoCommitLogId, kNoCommitLogTerm}; } } @@ -277,7 +277,7 @@ std::tuple Part::commitLogs( DCHECK_EQ(2, range.size()); auto code = batch->removeRange(range[0], range[1]); if (code != nebula::cpp2::ErrorCode::SUCCEEDED) { - LOG(ERROR) << idStr_ << "Failed to call WriteBatch::removeRange()"; + VLOG(3) << idStr_ << "Failed to call WriteBatch::removeRange()"; return {code, kNoCommitLogId, kNoCommitLogTerm}; } break; @@ -285,8 +285,8 @@ std::tuple Part::commitLogs( case OP_BATCH_WRITE: { auto data = decodeBatchValue(log); for (auto& op : data) { - VLOG(2) << "OP_BATCH_WRITE: " << folly::hexlify(op.second.first) - << ", val=" << folly::hexlify(op.second.second); + VLOG(4) << "OP_BATCH_WRITE: " << folly::hexlify(op.second.first) + << ", val = " << folly::hexlify(op.second.second); auto code = nebula::cpp2::ErrorCode::SUCCEEDED; if (op.first == BatchLogType::OP_BATCH_PUT) { code = batch->put(op.second.first, op.second.second); @@ -296,7 +296,7 @@ std::tuple Part::commitLogs( code = batch->removeRange(op.second.first, op.second.second); } if (code != nebula::cpp2::ErrorCode::SUCCEEDED) { - LOG(ERROR) << idStr_ << "Failed to call WriteBatch"; + VLOG(3) << idStr_ << "Failed to call WriteBatch"; return {code, kNoCommitLogId, kNoCommitLogTerm}; } } @@ -312,9 +312,9 @@ std::tuple Part::commitLogs( if (ts > startTimeMs_) { commitTransLeader(newLeader); } else { - LOG(INFO) << idStr_ << "Skip commit stale transfer leader " << newLeader - << ", the part is opened at " << startTimeMs_ << ", but the log timestamp is " - << ts; + VLOG(1) << idStr_ << "Skip commit stale transfer leader " << newLeader + << ", the part is opened at " << startTimeMs_ << ", but the log timestamp is " + << ts; } break; } @@ -324,14 +324,15 @@ std::tuple Part::commitLogs( if (ts > startTimeMs_) { commitRemovePeer(peer); } else { - LOG(INFO) << idStr_ << "Skip commit stale remove peer " << peer - << ", the part is opened at " << startTimeMs_ << ", but the log timestamp is " - << ts; + VLOG(1) << idStr_ << "Skip commit stale remove peer " << peer + << ", the part is opened at " << startTimeMs_ << ", but the log timestamp is " + << ts; } break; } default: { - LOG(WARNING) << idStr_ << "Unknown operation: " << static_cast(log[0]); + VLOG(3) << idStr_ + << "Should not reach here. Unknown operation: " << static_cast(log[0]); } } @@ -341,7 +342,7 @@ std::tuple Part::commitLogs( if (lastId >= 0) { auto code = putCommitMsg(batch.get(), lastId, lastTerm); if (code != nebula::cpp2::ErrorCode::SUCCEEDED) { - LOG(ERROR) << idStr_ << "Commit msg failed"; + VLOG(3) << idStr_ << "Put commit id into batch failed"; return {code, kNoCommitLogId, kNoCommitLogTerm}; } } @@ -368,14 +369,14 @@ std::pair Part::commitSnapshot(const std::vector& size += row.size(); auto kv = decodeKV(row); if (nebula::cpp2::ErrorCode::SUCCEEDED != batch->put(kv.first, kv.second)) { - LOG(ERROR) << idStr_ << "Put failed in commit"; + VLOG(3) << idStr_ << "Failed to call WriteBatch::put()"; return std::make_pair(0, 0); } } if (finished) { auto retCode = putCommitMsg(batch.get(), committedLogId, committedLogTerm); if (nebula::cpp2::ErrorCode::SUCCEEDED != retCode) { - LOG(ERROR) << idStr_ << "Put failed in commit"; + VLOG(3) << idStr_ << "Put commit id into batch failed"; return std::make_pair(0, 0); } } @@ -383,7 +384,6 @@ std::pair Part::commitSnapshot(const std::vector& auto code = engine_->commitBatchWrite( std::move(batch), FLAGS_rocksdb_disable_wal, FLAGS_rocksdb_wal_sync, true); if (code != nebula::cpp2::ErrorCode::SUCCEEDED) { - LOG(ERROR) << idStr_ << "Put failed in commit"; return std::make_pair(0, 0); } return std::make_pair(count, size); @@ -400,18 +400,18 @@ nebula::cpp2::ErrorCode Part::putCommitMsg(WriteBatch* batch, } bool Part::preProcessLog(LogID logId, TermID termId, ClusterID clusterId, const std::string& log) { - VLOG(3) << idStr_ << "logId " << logId << ", termId " << termId << ", clusterId " << clusterId; + VLOG(4) << idStr_ << "logId " << logId << ", termId " << termId << ", clusterId " << clusterId; if (!log.empty()) { switch (log[sizeof(int64_t)]) { case OP_ADD_LEARNER: { auto learner = decodeHost(OP_ADD_LEARNER, log); auto ts = getTimestamp(log); if (ts > startTimeMs_) { - LOG(INFO) << idStr_ << "preprocess add learner " << learner; + VLOG(1) << idStr_ << "preprocess add learner " << learner; addLearner(learner); } else { - LOG(INFO) << idStr_ << "Skip stale add learner " << learner << ", the part is opened at " - << startTimeMs_ << ", but the log timestamp is " << ts; + VLOG(1) << idStr_ << "Skip stale add learner " << learner << ", the part is opened at " + << startTimeMs_ << ", but the log timestamp is " << ts; } break; } @@ -419,12 +419,12 @@ bool Part::preProcessLog(LogID logId, TermID termId, ClusterID clusterId, const auto newLeader = decodeHost(OP_TRANS_LEADER, log); auto ts = getTimestamp(log); if (ts > startTimeMs_) { - LOG(INFO) << idStr_ << "preprocess trans leader " << newLeader; + VLOG(1) << idStr_ << "preprocess trans leader " << newLeader; preProcessTransLeader(newLeader); } else { - LOG(INFO) << idStr_ << "Skip stale transfer leader " << newLeader - << ", the part is opened at " << startTimeMs_ << ", but the log timestamp is " - << ts; + VLOG(1) << idStr_ << "Skip stale transfer leader " << newLeader + << ", the part is opened at " << startTimeMs_ << ", but the log timestamp is " + << ts; } break; } @@ -432,11 +432,11 @@ bool Part::preProcessLog(LogID logId, TermID termId, ClusterID clusterId, const auto peer = decodeHost(OP_ADD_PEER, log); auto ts = getTimestamp(log); if (ts > startTimeMs_) { - LOG(INFO) << idStr_ << "preprocess add peer " << peer; + VLOG(1) << idStr_ << "preprocess add peer " << peer; addPeer(peer); } else { - LOG(INFO) << idStr_ << "Skip stale add peer " << peer << ", the part is opened at " - << startTimeMs_ << ", but the log timestamp is " << ts; + VLOG(1) << idStr_ << "Skip stale add peer " << peer << ", the part is opened at " + << startTimeMs_ << ", but the log timestamp is " << ts; } break; } @@ -444,11 +444,11 @@ bool Part::preProcessLog(LogID logId, TermID termId, ClusterID clusterId, const auto peer = decodeHost(OP_REMOVE_PEER, log); auto ts = getTimestamp(log); if (ts > startTimeMs_) { - LOG(INFO) << idStr_ << "preprocess remove peer " << peer; + VLOG(1) << idStr_ << "preprocess remove peer " << peer; preProcessRemovePeer(peer); } else { - LOG(INFO) << idStr_ << "Skip stale remove peer " << peer << ", the part is opened at " - << startTimeMs_ << ", but the log timestamp is " << ts; + VLOG(1) << idStr_ << "Skip stale remove peer " << peer << ", the part is opened at " + << startTimeMs_ << ", but the log timestamp is " << ts; } break; } @@ -464,12 +464,12 @@ nebula::cpp2::ErrorCode Part::cleanup() { LOG(INFO) << idStr_ << "Clean rocksdb part data"; auto batch = engine_->startBatchWrite(); // Remove the vertex, edge, index, systemCommitKey, operation data under the part - const auto& vertexPre = NebulaKeyUtils::tagPrefix(partId_); - auto ret = batch->removeRange(NebulaKeyUtils::firstKey(vertexPre, vIdLen_), - NebulaKeyUtils::lastKey(vertexPre, vIdLen_)); + const auto& tagPre = NebulaKeyUtils::tagPrefix(partId_); + auto ret = batch->removeRange(NebulaKeyUtils::firstKey(tagPre, vIdLen_), + NebulaKeyUtils::lastKey(tagPre, vIdLen_)); if (ret != nebula::cpp2::ErrorCode::SUCCEEDED) { - LOG(ERROR) << idStr_ << "Remove the part vertex data failed, error " - << static_cast(ret); + VLOG(3) << idStr_ << "Failed to encode removeRange() when cleanup tag, error " + << apache::thrift::util::enumNameSafe(ret); return ret; } @@ -477,7 +477,8 @@ nebula::cpp2::ErrorCode Part::cleanup() { ret = batch->removeRange(NebulaKeyUtils::firstKey(edgePre, vIdLen_), NebulaKeyUtils::lastKey(edgePre, vIdLen_)); if (ret != nebula::cpp2::ErrorCode::SUCCEEDED) { - LOG(ERROR) << idStr_ << "Remove the part edge data failed, error" << static_cast(ret); + VLOG(3) << idStr_ << "Failed to encode removeRange() when cleanup edge, error " + << apache::thrift::util::enumNameSafe(ret); return ret; } @@ -485,8 +486,8 @@ nebula::cpp2::ErrorCode Part::cleanup() { ret = batch->removeRange(NebulaKeyUtils::firstKey(indexPre, sizeof(IndexID)), NebulaKeyUtils::lastKey(indexPre, sizeof(IndexID))); if (ret != nebula::cpp2::ErrorCode::SUCCEEDED) { - LOG(ERROR) << idStr_ << "Remove the part index data failed, error " - << static_cast(ret); + VLOG(3) << idStr_ << "Failed to encode removeRange() when cleanup index, error " + << apache::thrift::util::enumNameSafe(ret); return ret; } @@ -494,15 +495,26 @@ nebula::cpp2::ErrorCode Part::cleanup() { ret = batch->removeRange(NebulaKeyUtils::firstKey(operationPre, sizeof(int64_t)), NebulaKeyUtils::lastKey(operationPre, sizeof(int64_t))); if (ret != nebula::cpp2::ErrorCode::SUCCEEDED) { - LOG(ERROR) << idStr_ << "Remove the part operation data failed, error " - << static_cast(ret); + VLOG(3) << idStr_ << "Failed to encode removeRange() when cleanup operation, error " + << apache::thrift::util::enumNameSafe(ret); return ret; } + const auto& vertexPre = NebulaKeyUtils::vertexPrefix(partId_); + ret = batch->removeRange(NebulaKeyUtils::firstKey(vertexPre, vIdLen_), + NebulaKeyUtils::lastKey(vertexPre, vIdLen_)); + if (ret != nebula::cpp2::ErrorCode::SUCCEEDED) { + VLOG(3) << idStr_ << "Failed to encode removeRange() when cleanup operation, error " + << apache::thrift::util::enumNameSafe(ret); + return ret; + } + + // todo(doodle): toss prime and double prime + ret = batch->remove(NebulaKeyUtils::systemCommitKey(partId_)); if (ret != nebula::cpp2::ErrorCode::SUCCEEDED) { - LOG(ERROR) << idStr_ << "Remove the part system commit data failed, error " - << static_cast(ret); + VLOG(3) << idStr_ << "Remove the part system commit data failed, error " + << apache::thrift::util::enumNameSafe(ret); return ret; } return engine_->commitBatchWrite( diff --git a/src/kvstore/Part.h b/src/kvstore/Part.h index 94b2f676a08..cc3d476e600 100644 --- a/src/kvstore/Part.h +++ b/src/kvstore/Part.h @@ -19,10 +19,29 @@ namespace kvstore { using RaftClient = thrift::ThriftClientManager; +/** + * @brief A derived class of RaftPart, most of the interfaces are called from NebulaStore + */ class Part : public raftex::RaftPart { friend class SnapshotManager; public: + /** + * @brief Construct a new Part object + * + * @param spaceId + * @param partId + * @param localAddr Local address of the Part + * @param walPath Listener's wal path + * @param engine Pointer of kv engine + * @param pool IOThreadPool for listener + * @param workers Background thread for listener + * @param handlers Worker thread for listener + * @param snapshotMan Snapshot manager + * @param clientMan Client manager + * @param diskMan Disk manager + * @param vIdLen Vertex id length of space + */ Part(GraphSpaceID spaceId, PartitionID partId, HostAddr localAddr, @@ -40,43 +59,138 @@ class Part : public raftex::RaftPart { LOG(INFO) << idStr_ << "~Part()"; } + /** + * @brief Return the related kv engine + */ KVEngine* engine() { return engine_; } + /** + * @brief Write single key/values to kvstore asynchronously + * + * @param key Key to put + * @param value Value to put + * @param cb Callback when has a result + */ void asyncPut(folly::StringPiece key, folly::StringPiece value, KVCallback cb); + + /** + * @brief Write multiple key/values to kvstore asynchronously + * + * @param keyValues Key/values to put + * @param cb Callback when has a result + */ void asyncMultiPut(const std::vector& keyValues, KVCallback cb); + /** + * @brief Remove a key from kvstore asynchronously + * + * @param key Key to remove + * @param cb Callback when has a result + */ void asyncRemove(folly::StringPiece key, KVCallback cb); + + /** + * @brief Remove multible keys from kvstore asynchronously + * + * @param key Keys to remove + * @param cb Callback when has a result + */ void asyncMultiRemove(const std::vector& keys, KVCallback cb); + + /** + * @brief Remove keys in range [start, end) asynchronously + * + * @param start Start key + * @param end End key + * @param cb Callback when has a result + */ void asyncRemoveRange(folly::StringPiece start, folly::StringPiece end, KVCallback cb); + /** + * @brief Async commit multi operation, difference between asyncMultiPut or asyncMultiRemove + * is this method allow contains both put and remove together, difference between asyncAtomicOp is + * that asyncAtomicOp may have CAS + * + * @param batch Encoded write batch + * @param cb Callback when has a result + */ void asyncAppendBatch(std::string&& batch, KVCallback cb); + /** + * @brief Do some atomic operation on kvstore + * + * @param op Atomic operation + * @param cb Callback when has a result + */ void asyncAtomicOp(raftex::AtomicOp op, KVCallback cb); + /** + * @brief Add a raft learner asynchronously by adding raft log + * + * @param learner Address of learner + * @param cb Callback when has a result + */ void asyncAddLearner(const HostAddr& learner, KVCallback cb); + /** + * @brief Try to transfer raft leader to target host asynchronously, by adding raft log + * + * @param target Address of target new leader + * @param cb Callback when has a result + */ void asyncTransferLeader(const HostAddr& target, KVCallback cb); + /** + * @brief Add a raft peer asynchronously by adding raft log + * + * @param peer Address of new peer + * @param cb Callback when has a result + */ void asyncAddPeer(const HostAddr& peer, KVCallback cb); + /** + * @brief Remove a raft peer asynchronously by adding raft log + * + * @param peer Address of peer to be removed + * @param cb Callback when has a result + */ void asyncRemovePeer(const HostAddr& peer, KVCallback cb); + /** + * @brief Set the write blocking flag, if blocked, only heartbeat can be replicated + * + * @param sign True to block. Falst to unblock + */ void setBlocking(bool sign); - // Sync the information committed on follower. + /** + * @brief Synchronize the kvstore across multiple replica by add a empty log + * + * @param cb Callback when has a result + */ void sync(KVCallback cb); + /** + * @brief Register a callback when discover a new leader + * + * @param cb Callback when discovered a new leader + */ void registerNewLeaderCb(NewLeaderCallback cb) { newLeaderCb_ = std::move(cb); } + /** + * @brief Unregister the new leader callback + */ void unRegisterNewLeaderCb() { newLeaderCb_ = nullptr; } - // clean up all data about this part. + /** + * @brief Clean up all data about this part. + */ void resetPart() { std::lock_guard g(raftLock_); reset(); @@ -86,33 +200,101 @@ class Part : public raftex::RaftPart { /** * Methods inherited from RaftPart */ + + /** + * @brief Read last commit log id and term from external storage, used in initialization + * + * @return std::pair Last commit log id and last commit log term + */ std::pair lastCommittedLogId() override; + /** + * @brief Callback when a raft node lost leadership on term + * + * @param term + */ void onLostLeadership(TermID term) override; + /** + * @brief Callback when a raft node elected as leader on term + * + * @param term + */ void onElected(TermID term) override; + /** + * @brief Callback when a raft node is ready to serve as leader + * + * @param term + */ void onLeaderReady(TermID term) override; + /** + * @brief Callback when a raft node discover new leader + * + * @param nLeader New leader's address + */ void onDiscoverNewLeader(HostAddr nLeader) override; + /** + * @brief Apply the logs in iterator to state machine, and return the commit log id and commit log + * term if succeed + * + * @param iter Wal log iterator + * @param wait Whether we should until all data applied to state machine + * @return std::tuple + * + */ std::tuple commitLogs(std::unique_ptr iter, bool wait) override; + /** + * @brief Some special log need to be pre-processed when appending to wal + * + * @param logId Log id to pre-process + * @param termId Log term to pre-process + * @param clusterId Cluster id in wal + * @param log Log message in wal + * @return True if succeed. False if failed. + */ bool preProcessLog(LogID logId, TermID termId, ClusterID clusterId, const std::string& log) override; + /** + * @brief If a raft peer falls behind way to much than leader, the leader will send all its data + * in snapshot by batch, Part need to implement this method to apply the batch to state machine. + * The return value is a pair of of this batch. + * + * @param data Data to apply + * @param committedLogId Commit log id of snapshot + * @param committedLogTerm Commit log term of snapshot + * @param finished Whether spapshot is finished + * @return std::pair Return count and size of in the data + */ std::pair commitSnapshot(const std::vector& data, LogID committedLogId, TermID committedLogTerm, bool finished) override; + /** + * @brief Encode the commit log id and commit log term to write batch + * + * @param batch Pointer of write batch + * @param committedLogId Commit log id + * @param committedLogTerm Commit log term + * @return nebula::cpp2::ErrorCode + */ nebula::cpp2::ErrorCode putCommitMsg(WriteBatch* batch, LogID committedLogId, TermID committedLogTerm); + /** + * @brief clean up data in listener, called in RaftPart::reset + * + * @return nebula::cpp2::ErrorCode + */ nebula::cpp2::ErrorCode cleanup() override; public: @@ -123,8 +305,15 @@ class Part : public raftex::RaftPart { }; using LeaderChangeCB = std::function; + + /** + * @brief Register callback when raft node is ready to serve as leader + */ void registerOnLeaderReady(LeaderChangeCB cb); + /** + * @brief Register callback when raft node lost leadership + */ void registerOnLeaderLost(LeaderChangeCB cb); protected: diff --git a/src/kvstore/PartManager.cpp b/src/kvstore/PartManager.cpp index 75046050b1f..64ccdab9f72 100644 --- a/src/kvstore/PartManager.cpp +++ b/src/kvstore/PartManager.cpp @@ -59,7 +59,6 @@ MetaServerBasedPartManager::MetaServerBasedPartManager(HostAddr host, meta::Meta } MetaServerBasedPartManager::~MetaServerBasedPartManager() { - VLOG(3) << "~MetaServerBasedPartManager"; if (nullptr != client_) { client_->unRegisterListener(); client_ = nullptr; @@ -88,16 +87,12 @@ Status MetaServerBasedPartManager::spaceExist(const HostAddr& host, GraphSpaceID void MetaServerBasedPartManager::onSpaceAdded(GraphSpaceID spaceId, bool isListener) { if (handler_ != nullptr) { handler_->addSpace(spaceId, isListener); - } else { - VLOG(1) << "handler_ is nullptr!"; } } void MetaServerBasedPartManager::onSpaceRemoved(GraphSpaceID spaceId, bool isListener) { if (handler_ != nullptr) { handler_->removeSpace(spaceId, isListener); - } else { - VLOG(1) << "handler_ is nullptr!"; } } @@ -160,24 +155,18 @@ void MetaServerBasedPartManager::onSpaceOptionUpdated( if (!dbOpt.empty()) { handler_->updateSpaceOption(spaceId, dbOpt, true); } - } else { - VLOG(1) << "handler_ is nullptr!"; } } void MetaServerBasedPartManager::onPartAdded(const meta::PartHosts& partMeta) { if (handler_ != nullptr) { handler_->addPart(partMeta.spaceId_, partMeta.partId_, false, {}); - } else { - VLOG(1) << "handler_ is nullptr!"; } } void MetaServerBasedPartManager::onPartRemoved(GraphSpaceID spaceId, PartitionID partId) { if (handler_ != nullptr) { handler_->removePart(spaceId, partId); - } else { - VLOG(1) << "handler_ is nullptr!"; } } @@ -189,16 +178,12 @@ void MetaServerBasedPartManager::fetchLeaderInfo( std::unordered_map>& leaderIds) { if (handler_ != nullptr) { handler_->allLeader(leaderIds); - } else { - VLOG(1) << "handler_ is nullptr!"; } } void MetaServerBasedPartManager::fetchDiskParts(SpaceDiskPartsMap& diskParts) { if (handler_ != nullptr) { handler_->fetchDiskParts(diskParts); - } else { - VLOG(1) << "handler_ is nullptr!"; } } @@ -220,8 +205,6 @@ void MetaServerBasedPartManager::onListenerAdded(GraphSpaceID spaceId, const meta::ListenerHosts& listenerHost) { if (handler_ != nullptr) { handler_->addListener(spaceId, partId, listenerHost.type_, listenerHost.peers_); - } else { - VLOG(1) << "handler_ is nullptr!"; } } @@ -230,8 +213,6 @@ void MetaServerBasedPartManager::onListenerRemoved(GraphSpaceID spaceId, meta::cpp2::ListenerType type) { if (handler_ != nullptr) { handler_->removeListener(spaceId, partId, type); - } else { - VLOG(1) << "handler_ is nullptr!"; } } @@ -239,8 +220,6 @@ void MetaServerBasedPartManager::onCheckRemoteListeners( GraphSpaceID spaceId, PartitionID partId, const std::vector& remoteListeners) { if (handler_ != nullptr) { handler_->checkRemoteListeners(spaceId, partId, remoteListeners); - } else { - VLOG(1) << "handler_ is nullptr!"; } } diff --git a/src/kvstore/PartManager.h b/src/kvstore/PartManager.h index 0f7b8b80c1c..0e2527f89fb 100644 --- a/src/kvstore/PartManager.h +++ b/src/kvstore/PartManager.h @@ -16,48 +16,116 @@ namespace nebula { namespace kvstore { +/** + * @brief Handler when found space/part info changed, called from part manager + */ class Handler { public: virtual ~Handler() = default; + /** + * @brief Add a space + * + * @param spaceId + * @param isListener Whether the space is listener + */ virtual void addSpace(GraphSpaceID spaceId, bool isListener = false) = 0; + /** + * @brief Add a partition + * + * @param spaceId + * @param partId + * @param asLearner Whether start partition as learner + * @param peers Raft peers + */ virtual void addPart(GraphSpaceID spaceId, PartitionID partId, bool asLearner, const std::vector& peers) = 0; + /** + * @brief Update space specific options + * + * @param spaceId + * @param options Options map + * @param isDbOption + */ virtual void updateSpaceOption(GraphSpaceID spaceId, const std::unordered_map& options, bool isDbOption) = 0; + /** + * @brief Remove a space + * + * @param spaceId + * @param isListener Whether the space is listener + */ virtual void removeSpace(GraphSpaceID spaceId, bool isListener = false) = 0; + /** + * @brief Remove a partition + * + * @param spaceId + * @param partId + */ virtual void removePart(GraphSpaceID spaceId, PartitionID partId) = 0; + /** + * @brief Add a partition as listener + * + * @param spaceId + * @param partId + * @param type Listener type + * @param peers Raft peers of listener + */ virtual void addListener(GraphSpaceID spaceId, PartitionID partId, meta::cpp2::ListenerType type, const std::vector& peers) = 0; + /** + * @brief Remove a listener partition + * + * @param spaceId + * @param partId + * @param type Listener type + */ virtual void removeListener(GraphSpaceID spaceId, PartitionID partId, meta::cpp2::ListenerType type) = 0; + /** + * @brief Check if the partition's listener state has changed, add/remove if necessary + * + * @param spaceId + * @param partId + * @param remoteListeners The given partition's remote listener list + */ virtual void checkRemoteListeners(GraphSpaceID spaceId, PartitionID partId, const std::vector& remoteListeners) = 0; - // get infos from handler(nebula store) to listener(meta_client -> meta) + /** + * @brief Retrive the leader distribution + * + * @param leaderIds The leader address of all partitions + * @return int32_t The leader count of all spaces + */ virtual int32_t allLeader( std::unordered_map>& leaderIds) = 0; + /** + * @brief Get all partitions grouped by data path and spaceId + * + * @param diskParts Get all space data path and all partition in the path + */ virtual void fetchDiskParts(SpaceDiskPartsMap& diskParts) = 0; }; /** - * This class manages all meta information one storage host needed. - * */ + * @brief This class manages all meta information one storage host needed. + */ class PartManager { public: PartManager() = default; @@ -65,33 +133,65 @@ class PartManager { virtual ~PartManager() = default; /** - * return meta::PartsMap for host - * */ + * @brief return part allocation for a host + * + * @param host + * @return meta::PartsMap Data part allocation of all spaces on the host + */ virtual meta::PartsMap parts(const HostAddr& host) = 0; /** - * return meta::PartHosts for - * */ + * @brief Return all peers of a given partition + * + * @param spaceId + * @param partId + * @return StatusOr Return peers of a partition if succeeded, else return a error + * status + */ virtual StatusOr partMeta(GraphSpaceID spaceId, PartitionID partId) = 0; /** - * Check current part exist or not on host. - * */ + * @brief Check current part exist or not on host. + * + * @param host + * @param spaceId + * @param partId + * @return Status + */ virtual Status partExist(const HostAddr& host, GraphSpaceID spaceId, PartitionID partId) = 0; /** - * Check current space exist or not. - * */ + * @brief Check host has the space exist or not. + * + * @param host + * @param spaceId + * @return Status + */ virtual Status spaceExist(const HostAddr& host, GraphSpaceID spaceId) = 0; + /** + * @brief Return the listener allocation of a host + * + * @param host + * @return meta::ListenersMap Listener allocation of all spaces on the host + */ virtual meta::ListenersMap listeners(const HostAddr& host) = 0; + /** + * @brief Return remote listener info if given partition has any listener + * + * @param spaceId + * @param partId + * @return StatusOr> Remote listener infomations + */ virtual StatusOr> listenerPeerExist(GraphSpaceID spaceId, PartitionID partId) = 0; /** - * Register Handler - * */ + * @brief Register a handler to part mananger, e.g. NebulaStore + * + * @param handler + */ void registerHandler(Handler* handler) { handler_ = handler; } @@ -101,8 +201,8 @@ class PartManager { }; /** -: * Memory based PartManager, it is used in UTs now. - * */ + * @brief Memory based PartManager, it is used in UTs now. + */ class MemPartManager final : public PartManager { FRIEND_TEST(NebulaStoreTest, SimpleTest); FRIEND_TEST(NebulaStoreTest, PartsTest); @@ -121,10 +221,31 @@ class MemPartManager final : public PartManager { ~MemPartManager() = default; + /** + * @brief return part allocation for a host + * + * @param host + * @return meta::PartsMap Data part allocation of all spaces on the host + */ meta::PartsMap parts(const HostAddr& host) override; + /** + * @brief Return all peers of a given partition + * + * @param spaceId + * @param partId + * @return StatusOr Return peers of a partition if succeeded, else return a error + * status + */ StatusOr partMeta(GraphSpaceID spaceId, PartitionID partId) override; + /** + * @brief Add a partition with its peers + * + * @param spaceId + * @param partId + * @param peers + */ void addPart(GraphSpaceID spaceId, PartitionID partId, std::vector peers = {}) { bool noSpace = partsMap_.find(spaceId) == partsMap_.end(); auto& p = partsMap_[spaceId]; @@ -142,6 +263,12 @@ class MemPartManager final : public PartManager { } } + /** + * @brief Remove a partition from part manager + * + * @param spaceId + * @param partId + */ void removePart(GraphSpaceID spaceId, PartitionID partId) { auto it = partsMap_.find(spaceId); CHECK(it != partsMap_.end()); @@ -156,8 +283,23 @@ class MemPartManager final : public PartManager { } } + /** + * @brief Check current part exist or not on host. + * + * @param host + * @param spaceId + * @param partId + * @return Status + */ Status partExist(const HostAddr& host, GraphSpaceID spaceId, PartitionID partId) override; + /** + * @brief Check host has the space exist or not. + * + * @param host + * @param spaceId + * @return Status + */ Status spaceExist(const HostAddr&, GraphSpaceID spaceId) override { if (partsMap_.find(spaceId) != partsMap_.end()) { return Status::OK(); @@ -166,12 +308,30 @@ class MemPartManager final : public PartManager { } } + /** + * @brief Return the part allocation + * + * @return meta::PartsMap& + */ meta::PartsMap& partsMap() { return partsMap_; } + /** + * @brief Return the listener allocation of a host + * + * @param host + * @return meta::ListenersMap Listener allocation of all spaces on the host + */ meta::ListenersMap listeners(const HostAddr& host) override; + /** + * @brief Return remote listener info if given partition has any listener + * + * @param spaceId + * @param partId + * @return StatusOr> Remote listener infomations + */ StatusOr> listenerPeerExist(GraphSpaceID spaceId, PartitionID partId) override; @@ -181,54 +341,168 @@ class MemPartManager final : public PartManager { meta::RemoteListeners remoteListeners_; }; +/** + * @brief Part mananger based on meta client and server, all interfaces will read from meta client + * cache or meta server + */ class MetaServerBasedPartManager : public PartManager, public meta::MetaChangedListener { public: + /** + * @brief Construct a new part mananger based on meta + * + * @param host Local address + * @param client Meta client + */ explicit MetaServerBasedPartManager(HostAddr host, meta::MetaClient* client = nullptr); ~MetaServerBasedPartManager(); + /** + * @brief return part allocation for a host + * + * @param host + * @return meta::PartsMap Data part allocation of all spaces on the host + */ meta::PartsMap parts(const HostAddr& host) override; + /** + * @brief Return all peers of a given partition + * + * @param spaceId + * @param partId + * @return StatusOr Return peers of a partition if succeeded, else return a error + * status + */ StatusOr partMeta(GraphSpaceID spaceId, PartitionID partId) override; + /** + * @brief Check current part exist or not on host. + * + * @param host + * @param spaceId + * @param partId + * @return Status + */ Status partExist(const HostAddr& host, GraphSpaceID spaceId, PartitionID partId) override; + /** + * @brief Check host has the space exist or not. + * + * @param host + * @param spaceId + * @return Status + */ Status spaceExist(const HostAddr& host, GraphSpaceID spaceId) override; + /** + * @brief Return the listener allocation of a host + * + * @param host + * @return meta::ListenersMap Listener allocation of all spaces on the host + */ meta::ListenersMap listeners(const HostAddr& host) override; + /** + * @brief Return remote listener info if given partition has any listener + * + * @param spaceId + * @param partId + * @return StatusOr> Remote listener infomations + */ StatusOr> listenerPeerExist(GraphSpaceID spaceId, PartitionID partId) override; - + // Folloing methods implement the interfaces in MetaChangedListener /** - * Implement the interfaces in MetaChangedListener - * */ + * @brief Found a new space, call handler's method + * + * @param spaceId + * @param isListener Whether the space is a listener + */ void onSpaceAdded(GraphSpaceID spaceId, bool isListener) override; + /** + * @brief Found a removed space, call handler's method + * + * @param spaceId + * @param isListener Whether the space is a listener + */ void onSpaceRemoved(GraphSpaceID spaceId, bool isListener) override; + /** + * @brief Found space option updated, call handler's methos + * + * @param spaceId + * @param options Options map + */ void onSpaceOptionUpdated(GraphSpaceID spaceId, const std::unordered_map& options) override; + /** + * @brief Found a new part, call handler's method + * + * @param partMeta Partition's id and peers + */ void onPartAdded(const meta::PartHosts& partMeta) override; + /** + * @brief Found a removed part, call handler's method + * + * @param spaceId + * @param partId + */ void onPartRemoved(GraphSpaceID spaceId, PartitionID partId) override; + /** + * @brief Found a part updated, call handler's method + * + * @param partMeta Partition's id and peers + */ void onPartUpdated(const meta::PartHosts& partMeta) override; + /** + * @brief Fetch leader distribution from handler + * + * @param leaderParts Leader distribution + */ void fetchLeaderInfo( std::unordered_map>& leaderParts) override; + /** + * @brief Fetch disk and partition relation + * + * @param diskParts Partition allocation grouped by disk + */ void fetchDiskParts(SpaceDiskPartsMap& diskParts) override; + /** + * @brief Found a new listener, call handler's method + * + * @param spaceId + * @param partId + * @param listenerHosts Listener's peer + */ void onListenerAdded(GraphSpaceID spaceId, PartitionID partId, const meta::ListenerHosts& listenerHosts) override; + /** + * @brief Found a removed listener, call handler's method + * + * @param spaceId + * @param partId + * @param type Listener type + */ void onListenerRemoved(GraphSpaceID spaceId, PartitionID partId, meta::cpp2::ListenerType type) override; + /** + * @brief Check if a parition has remote listeners, add or remove if necessary + * + * @param spaceId + * @param partId + * @param remoteListeners Remote listener infos + */ void onCheckRemoteListeners(GraphSpaceID spaceId, PartitionID partId, const std::vector& remoteListeners) override; diff --git a/src/kvstore/RateLimiter.h b/src/kvstore/RateLimiter.h index 3af482cdcbd..6e03ec8da5b 100644 --- a/src/kvstore/RateLimiter.h +++ b/src/kvstore/RateLimiter.h @@ -28,8 +28,15 @@ class RateLimiter { bucket_.reset(new folly::DynamicTokenBucket(static_cast(now + waitInSec))); } - // Caller must make sure the **the partition has been add, and won't be removed during consume.** - // Snapshot and rebuild index follow this principle by design. + /** + * @brief Consume some budget from rate limiter. Caller must make sure the **the partition has + * been add, and won't be removed during consume.** Snapshot and rebuild index follow this + * principle by design. + * + * @param toConsume Amount to consume + * @param rate Generate speed + * @param burstSize Maximum consume speed to consume + */ void consume(double toConsume, double rate, double burstSize) { if (toConsume > burstSize) { // consumeWithBorrowAndWait do nothing when toConsume > burstSize_, we sleep 1s instead diff --git a/src/kvstore/RocksEngine.cpp b/src/kvstore/RocksEngine.cpp index 6cd542c00a7..43e92cca92f 100644 --- a/src/kvstore/RocksEngine.cpp +++ b/src/kvstore/RocksEngine.cpp @@ -171,7 +171,7 @@ nebula::cpp2::ErrorCode RocksEngine::commitBatchWrite(std::unique_ptr keyValues) { if (status.ok()) { return nebula::cpp2::ErrorCode::SUCCEEDED; } else { - VLOG(3) << "MultiPut Failed: " << status.ToString(); + VLOG(4) << "MultiPut Failed: " << status.ToString(); return nebula::cpp2::ErrorCode::E_UNKNOWN; } } @@ -326,7 +326,7 @@ nebula::cpp2::ErrorCode RocksEngine::remove(const std::string& key) { if (status.ok()) { return nebula::cpp2::ErrorCode::SUCCEEDED; } else { - VLOG(3) << "Remove Failed: " << key << status.ToString(); + VLOG(4) << "Remove Failed: " << key << status.ToString(); return nebula::cpp2::ErrorCode::E_UNKNOWN; } } @@ -342,7 +342,7 @@ nebula::cpp2::ErrorCode RocksEngine::multiRemove(std::vector keys) if (status.ok()) { return nebula::cpp2::ErrorCode::SUCCEEDED; } else { - VLOG(3) << "MultiRemove Failed: " << status.ToString(); + VLOG(4) << "MultiRemove Failed: " << status.ToString(); return nebula::cpp2::ErrorCode::E_UNKNOWN; } } @@ -354,7 +354,7 @@ nebula::cpp2::ErrorCode RocksEngine::removeRange(const std::string& start, const if (status.ok()) { return nebula::cpp2::ErrorCode::SUCCEEDED; } else { - VLOG(3) << "RemoveRange Failed: " << status.ToString(); + VLOG(4) << "RemoveRange Failed: " << status.ToString(); return nebula::cpp2::ErrorCode::E_UNKNOWN; } } @@ -423,7 +423,7 @@ nebula::cpp2::ErrorCode RocksEngine::ingest(const std::vector& file if (status.ok()) { return nebula::cpp2::ErrorCode::SUCCEEDED; } else { - LOG(ERROR) << "Ingest Failed: " << status.ToString(); + LOG(WARNING) << "Ingest Failed: " << status.ToString(); return nebula::cpp2::ErrorCode::E_UNKNOWN; } } @@ -437,7 +437,7 @@ nebula::cpp2::ErrorCode RocksEngine::setOption(const std::string& configKey, LOG(INFO) << "SetOption Succeeded: " << configKey << ":" << configValue; return nebula::cpp2::ErrorCode::SUCCEEDED; } else { - LOG(ERROR) << "SetOption Failed: " << configKey << ":" << configValue; + LOG(WARNING) << "SetOption Failed: " << configKey << ":" << configValue; return nebula::cpp2::ErrorCode::E_INVALID_PARM; } } @@ -451,7 +451,7 @@ nebula::cpp2::ErrorCode RocksEngine::setDBOption(const std::string& configKey, LOG(INFO) << "SetDBOption Succeeded: " << configKey << ":" << configValue; return nebula::cpp2::ErrorCode::SUCCEEDED; } else { - LOG(ERROR) << "SetDBOption Failed: " << configKey << ":" << configValue; + LOG(WARNING) << "SetDBOption Failed: " << configKey << ":" << configValue; return nebula::cpp2::ErrorCode::E_INVALID_PARM; } } @@ -474,7 +474,7 @@ nebula::cpp2::ErrorCode RocksEngine::compact() { if (status.ok()) { return nebula::cpp2::ErrorCode::SUCCEEDED; } else { - LOG(ERROR) << "CompactAll Failed: " << status.ToString(); + LOG(WARNING) << "CompactAll Failed: " << status.ToString(); return nebula::cpp2::ErrorCode::E_UNKNOWN; } } @@ -485,7 +485,7 @@ nebula::cpp2::ErrorCode RocksEngine::flush() { if (status.ok()) { return nebula::cpp2::ErrorCode::SUCCEEDED; } else { - LOG(ERROR) << "Flush Failed: " << status.ToString(); + LOG(WARNING) << "Flush Failed: " << status.ToString(); return nebula::cpp2::ErrorCode::E_UNKNOWN; } } @@ -500,7 +500,7 @@ nebula::cpp2::ErrorCode RocksEngine::backup() { if (status.ok()) { return nebula::cpp2::ErrorCode::SUCCEEDED; } else { - LOG(ERROR) << "backup failed: " << status.ToString(); + LOG(WARNING) << "backup failed: " << status.ToString(); return nebula::cpp2::ErrorCode::E_BACKUP_FAILED; } } @@ -528,7 +528,8 @@ void RocksEngine::openBackupEngine(GraphSpaceID spaceId) { if (!FLAGS_rocksdb_wal_dir.empty()) { walDir = folly::stringPrintf("%s/rocksdb_wal/%d", FLAGS_rocksdb_wal_dir.c_str(), spaceId); } else { - LOG(WARNING) << "rocksdb wal is stored with data"; + LOG(WARNING) << "rocksdb wal is stored with data. If data_path is on tmpfs, the wal is " + "volatile as well"; } rocksdb::RestoreOptions restoreOptions; @@ -550,21 +551,21 @@ void RocksEngine::openBackupEngine(GraphSpaceID spaceId) { nebula::cpp2::ErrorCode RocksEngine::createCheckpoint(const std::string& checkpointPath) { LOG(INFO) << "Target checkpoint data path : " << checkpointPath; if (fs::FileUtils::exist(checkpointPath) && !fs::FileUtils::remove(checkpointPath.data(), true)) { - LOG(ERROR) << "Remove exist checkpoint data dir failed: " << checkpointPath; + LOG(WARNING) << "Remove exist checkpoint data dir failed: " << checkpointPath; return nebula::cpp2::ErrorCode::E_STORE_FAILURE; } rocksdb::Checkpoint* checkpoint; rocksdb::Status status = rocksdb::Checkpoint::Create(db_.get(), &checkpoint); if (!status.ok()) { - LOG(ERROR) << "Init checkpoint Failed: " << status.ToString(); + LOG(WARNING) << "Init checkpoint Failed: " << status.ToString(); return nebula::cpp2::ErrorCode::E_FAILED_TO_CHECKPOINT; } std::unique_ptr cp(checkpoint); status = cp->CreateCheckpoint(checkpointPath, 0); if (!status.ok()) { - LOG(ERROR) << "Create checkpoint Failed: " << status.ToString(); + LOG(WARNING) << "Create checkpoint Failed: " << status.ToString(); return nebula::cpp2::ErrorCode::E_FAILED_TO_CHECKPOINT; } return nebula::cpp2::ErrorCode::SUCCEEDED; @@ -582,7 +583,7 @@ ErrorOr RocksEngine::backupTable( auto parent = backupPath.substr(0, backupPath.rfind('/')); if (!FileUtils::exist(parent)) { if (!FileUtils::makeDir(parent)) { - LOG(ERROR) << "Make dir " << parent << " failed"; + LOG(WARNING) << "Make dir " << parent << " failed"; return nebula::cpp2::ErrorCode::E_BACKUP_FAILED; } } @@ -601,7 +602,7 @@ ErrorOr RocksEngine::backupTable( rocksdb::SstFileWriter sstFileWriter(rocksdb::EnvOptions(), options); auto s = sstFileWriter.Open(backupPath); if (!s.ok()) { - LOG(ERROR) << "BackupTable failed, path: " << backupPath << ", error: " << s.ToString(); + LOG(WARNING) << "BackupTable failed, path: " << backupPath << ", error: " << s.ToString(); return nebula::cpp2::ErrorCode::E_BACKUP_TABLE_FAILED; } @@ -612,7 +613,7 @@ ErrorOr RocksEngine::backupTable( s = sstFileWriter.Put(iter->key().toString(), iter->val().toString()); if (!s.ok()) { - LOG(ERROR) << "BackupTable failed, path: " << backupPath << ", error: " << s.ToString(); + LOG(WARNING) << "BackupTable failed, path: " << backupPath << ", error: " << s.ToString(); sstFileWriter.Finish(); return nebula::cpp2::ErrorCode::E_BACKUP_TABLE_FAILED; } diff --git a/src/kvstore/RocksEngine.h b/src/kvstore/RocksEngine.h index 16f5ba0216f..0fb9ecf2c11 100644 --- a/src/kvstore/RocksEngine.h +++ b/src/kvstore/RocksEngine.h @@ -19,6 +19,9 @@ namespace nebula { namespace kvstore { +/** + * @brief Rocksdb range iterator, only scan data in range [start, end) + */ class RocksRangeIter : public KVIterator { public: RocksRangeIter(rocksdb::Iterator* iter, rocksdb::Slice start, rocksdb::Slice end) @@ -52,6 +55,9 @@ class RocksRangeIter : public KVIterator { rocksdb::Slice end_; }; +/** + * @brief Rocksdb prefix iterator, only scan data starts with prefix + */ class RocksPrefixIter : public KVIterator { public: RocksPrefixIter(rocksdb::Iterator* iter, rocksdb::Slice prefix) : iter_(iter), prefix_(prefix) {} @@ -83,6 +89,9 @@ class RocksPrefixIter : public KVIterator { rocksdb::Slice prefix_; }; +/** + * @brief Rocksdb iterator to scan all data + */ class RocksCommonIter : public KVIterator { public: explicit RocksCommonIter(rocksdb::Iterator* iter) : iter_(iter) {} @@ -113,15 +122,25 @@ class RocksCommonIter : public KVIterator { std::unique_ptr iter_; }; -/************************************************************************** - * - * An implementation of KVEngine based on Rocksdb +/** + * @brief An implementation of KVEngine based on Rocksdb * - *************************************************************************/ + */ class RocksEngine : public KVEngine { FRIEND_TEST(RocksEngineTest, SimpleTest); public: + /** + * @brief Construct a new rocksdb instance + * + * @param spaceId + * @param vIdLen Vertex id length, used for perfix bloom filter + * @param dataPath Rocksdb data path + * @param walPath Rocksdb wal path + * @param mergeOp Rocksdb merge operation + * @param cfFactory Rocksdb compaction filter factory + * @param readonly Whether start as read only instance + */ RocksEngine(GraphSpaceID spaceId, int32_t vIdLen, const std::string& dataPath, @@ -136,26 +155,56 @@ class RocksEngine : public KVEngine { void stop() override; - // return path to a spaceId, e.g. "/DataPath/nebula/spaceId", usually it should - // contain two subdir: data and wal. + /** + * @brief Return path to a spaceId, e.g. "/DataPath/nebula/spaceId", usually it should contain two + * subdir: data and wal. + */ const char* getDataRoot() const override { return dataPath_.c_str(); } + /** + * @brief Return the wal path + */ const char* getWalRoot() const override { return walPath_.c_str(); } + /** + * @brief Get the rocksdb snapshot + * + * @return const void* Pointer of rocksdb snapshot + */ const void* GetSnapshot() override { return db_->GetSnapshot(); } + /** + * @brief Release the given rocksdb snapshot + * + * @param snapshot Pointer of rocksdb snapshot to release + */ void ReleaseSnapshot(const void* snapshot) override { db_->ReleaseSnapshot(reinterpret_cast(snapshot)); } + /** + * @brief return a WriteBatch object to do batch operation + * + * @return std::unique_ptr + */ std::unique_ptr startBatchWrite() override; + /** + * @brief write the batch operation into kv engine + * + * @param batch WriteBatch object + * @param disableWAL Whether wal is disabled, only used in rocksdb + * @param sync Whether need to sync when write, only used in rocksdb + * @param wait Whether wait until write result, rocksdb would return incompelete if wait is false + * in certain scenario + * @return nebula::cpp2::ErrorCode + */ nebula::cpp2::ErrorCode commitBatchWrite(std::unique_ptr batch, bool disableWAL, bool sync, @@ -164,86 +213,264 @@ class RocksEngine : public KVEngine { /********************* * Data retrieval ********************/ + /** + * @brief Read a single key + * + * @param key Key to read + * @param value Pointer of value + * @return nebula::cpp2::ErrorCode + */ nebula::cpp2::ErrorCode get(const std::string& key, std::string* value) override; + /** + * @brief Read a list of keys + * + * @param keys Keys to read + * @param values Pointers of value + * @return std::vector Result status of each key, if key[i] does not exist, the i-th value + * in return value would be Status::KeyNotFound + */ std::vector multiGet(const std::vector& keys, std::vector* values) override; + /** + * @brief Get all results in range [start, end) + * + * @param start Start key, inclusive + * @param end End key, exclusive + * @param iter Iterator in range [start, end) + * @return nebula::cpp2::ErrorCode + */ nebula::cpp2::ErrorCode range(const std::string& start, const std::string& end, std::unique_ptr* iter) override; + /** + * @brief Get all results with 'prefix' str as prefix. + * + * @param prefix The prefix of keys to iterate + * @param iter Iterator of keys starts with 'prefix' + * @return nebula::cpp2::ErrorCode + */ nebula::cpp2::ErrorCode prefix(const std::string& prefix, std::unique_ptr* iter, const void* snapshot = nullptr) override; + /** + * @brief Get all results with 'prefix' str as prefix starting form 'start' + * + * @param start Start key, inclusive + * @param prefix The prefix of keys to iterate + * @param iter Iterator of keys starts with 'prefix' beginning from 'start' + * @return nebula::cpp2::ErrorCode + */ nebula::cpp2::ErrorCode rangeWithPrefix(const std::string& start, const std::string& prefix, std::unique_ptr* iter) override; + /** + * @brief Prefix scan with prefix extractor + * + * @param prefix The prefix of keys to iterate + * @param iter Iterator of keys starts with 'prefix' + * @return nebula::cpp2::ErrorCode + */ nebula::cpp2::ErrorCode prefixWithExtractor(const std::string& prefix, const void* snapshot, std::unique_ptr* storageIter); + /** + * @brief Prefix scan without prefix extractor, use total order seek + * + * @param prefix The prefix of keys to iterate + * @param iter Iterator of keys starts with 'prefix' + * @return nebula::cpp2::ErrorCode + */ nebula::cpp2::ErrorCode prefixWithoutExtractor(const std::string& prefix, const void* snapshot, std::unique_ptr* storageIter); - nebula::cpp2::ErrorCode scan(std::unique_ptr* storageIter) override; + /** + * @brief Scan all data in rocksdb + * + * @param iter Iterator of rocksdb + * @return nebula::cpp2::ErrorCode + */ + nebula::cpp2::ErrorCode scan(std::unique_ptr* iter) override; + /********************* * Data modification ********************/ + /** + * @brief Write a single record + * + * @param key Key to write + * @param value Value to write + * @return nebula::cpp2::ErrorCode + */ nebula::cpp2::ErrorCode put(std::string key, std::string value) override; + /** + * @brief Write a batch of records + * + * @param keyValues Key-value pairs to write + * @return nebula::cpp2::ErrorCode + */ nebula::cpp2::ErrorCode multiPut(std::vector keyValues) override; + /** + * @brief Remove a single key + * + * @param key Key to remove + * @return nebula::cpp2::ErrorCode + */ nebula::cpp2::ErrorCode remove(const std::string& key) override; + /** + * @brief Remove a batch of keys + * + * @param keys Keys to remove + * @return nebula::cpp2::ErrorCode + */ nebula::cpp2::ErrorCode multiRemove(std::vector keys) override; + /** + * @brief Remove key in range [start, end) + * + * @param start Start key, inclusive + * @param end End key, exclusive + * @return nebula::cpp2::ErrorCode + */ nebula::cpp2::ErrorCode removeRange(const std::string& start, const std::string& end) override; /********************* * Non-data operation ********************/ + + /** + * @brief Write the part key into rocksdb for persistance + * + * @param partId + */ void addPart(PartitionID partId) override; + /** + * @brief Remove the part key from rocksdb + * + * @param partId + */ void removePart(PartitionID partId) override; + /** + * @brief Return all partitions in rocksdb instance by scanning system part key + * + * @return std::vector Partition ids + */ std::vector allParts() override; + /** + * @brief Return total partition numbers + */ int32_t totalPartsNum() override; + /** + * @brief Ingest external sst files + * + * @param files SST file path + * @param verifyFileChecksum Whether verify sst check-sum during ingestion + * @return nebula::cpp2::ErrorCode + */ nebula::cpp2::ErrorCode ingest(const std::vector& files, bool verifyFileChecksum = false) override; + /** + * @brief Set config option + * + * @param configKey Config name + * @param configValue Config value + * @return nebula::cpp2::ErrorCode + */ nebula::cpp2::ErrorCode setOption(const std::string& configKey, const std::string& configValue) override; + /** + * @brief Set DB config option + * + * @param configKey Config name + * @param configValue Config value + * @return nebula::cpp2::ErrorCode + */ nebula::cpp2::ErrorCode setDBOption(const std::string& configKey, const std::string& configValue) override; + /** + * @brief Get engine property + * + * @param property Config name + * @return ErrorOr + */ ErrorOr getProperty(const std::string& property) override; + /** + * @brief Do data compation in lsm tree + * + * @return nebula::cpp2::ErrorCode + */ nebula::cpp2::ErrorCode compact() override; + /** + * @brief Flush data in memtable into sst + * + * @return nebula::cpp2::ErrorCode + */ nebula::cpp2::ErrorCode flush() override; + /** + * @brief Call rocksdb backup, mainly for rocksdb PlainTable mounted on tmpfs/ramfs + * + * @return nebula::cpp2::ErrorCode + */ nebula::cpp2::ErrorCode backup() override; /********************* * Checkpoint operation ********************/ + /** + * @brief Create a rocksdb check point + * + * @param checkpointPath + * @return nebula::cpp2::ErrorCode + */ nebula::cpp2::ErrorCode createCheckpoint(const std::string& checkpointPath) override; + /** + * @brief Backup the data of a table prefix, for meta backup + * + * @param path KV engine path + * @param tablePrefix Table prefix + * @param filter Data filter when iterate the table + * @return ErrorOr> Return the sst file path if + * succeed, else return ErrorCode + */ ErrorOr backupTable( const std::string& path, const std::string& tablePrefix, std::function filter) override; private: + /** + * @brief System part key, indicate which partitions in rocksdb instance + * + * @param partId + * @return std::string + */ std::string partKey(PartitionID partId); + /** + * @brief Open the rocksdb backup engine, mainly for rocksdb PlainTable mounted on tmpfs/ramfs + * + * @param spaceId + */ void openBackupEngine(GraphSpaceID spaceId); private: diff --git a/src/kvstore/RocksEngineConfig.h b/src/kvstore/RocksEngineConfig.h index 404da00a18e..ffb428dac5e 100644 --- a/src/kvstore/RocksEngineConfig.h +++ b/src/kvstore/RocksEngineConfig.h @@ -70,12 +70,30 @@ DECLARE_uint64(rocksdb_kv_separation_threshold); namespace nebula { namespace kvstore { +/** + * @brief Build rocksdb options form gflags + * + * @param baseOpts Rocksdb options + * @param spaceId + * @param vidLen + * @return rocksdb::Status + */ rocksdb::Status initRocksdbOptions(rocksdb::Options &baseOpts, GraphSpaceID spaceId, int32_t vidLen = 8); +/** + * @brief Load a gflag into map + * + * @param map + * @param gflags + * @return Return succeeded or not + */ bool loadOptionsMap(std::unordered_map &map, const std::string &gflags); +/** + * @brief Retrieve rocksdb statistics, return nullptr if not enabled + */ std::shared_ptr getDBStatistics(); } // namespace kvstore diff --git a/src/kvstore/plugins/CMakeLists.txt b/src/kvstore/plugins/CMakeLists.txt index aef92163b2b..46e0ce6d6a2 100644 --- a/src/kvstore/plugins/CMakeLists.txt +++ b/src/kvstore/plugins/CMakeLists.txt @@ -1,2 +1 @@ -nebula_add_subdirectory(hbase) nebula_add_subdirectory(elasticsearch) diff --git a/src/kvstore/plugins/elasticsearch/ESListener.cpp b/src/kvstore/plugins/elasticsearch/ESListener.cpp index 3f40ed8d9bc..a4cacbc2196 100644 --- a/src/kvstore/plugins/elasticsearch/ESListener.cpp +++ b/src/kvstore/plugins/elasticsearch/ESListener.cpp @@ -125,7 +125,7 @@ bool ESListener::writeAppliedId(LogID lastId, TermID lastTerm, LogID lastApplyLo auto raw = encodeAppliedId(lastId, lastTerm, lastApplyLogId); ssize_t written = write(fd, raw.c_str(), raw.size()); if (written != (ssize_t)raw.size()) { - VLOG(3) << idStr_ << "bytesWritten:" << written << ", expected:" << raw.size() + VLOG(4) << idStr_ << "bytesWritten:" << written << ", expected:" << raw.size() << ", error:" << strerror(errno); close(fd); return false; @@ -212,7 +212,7 @@ bool ESListener::writeData(const std::vector& items) co if (isNeedWriteOneByOne) { return writeDatum(items); } - LOG(ERROR) << "A fatal error . Full-text engine is not working."; + LOG(WARNING) << idStr_ << "Failed to bulk into es."; return false; } @@ -237,7 +237,7 @@ bool ESListener::writeDatum(const std::vector& items) c } if (!done) { // means CURL fails, and no need to take the next step - LOG(ERROR) << "A fatal error . Full-text engine is not working."; + LOG(INFO) << idStr_ << "Failed to put into es."; return false; } } diff --git a/src/kvstore/plugins/elasticsearch/ESListener.h b/src/kvstore/plugins/elasticsearch/ESListener.h index 8631643c251..77fa8803d0f 100644 --- a/src/kvstore/plugins/elasticsearch/ESListener.h +++ b/src/kvstore/plugins/elasticsearch/ESListener.h @@ -17,6 +17,21 @@ using nebula::plugin::DocItem; class ESListener : public Listener { public: + /** + * @brief Construct a new ES Listener, it is a derived class of Listener + * + * @param spaceId + * @param partId + * @param localAddr Listener ip/addr + * @param walPath Listener's wal path + * @param ioPool IOThreadPool for listener + * @param workers Background thread for listener + * @param handlers Worker thread for listener + * @param snapshotMan Snapshot manager + * @param clientMan Client manager + * @param diskMan Disk manager + * @param schemaMan Schema manager + */ ESListener(GraphSpaceID spaceId, PartitionID partId, HostAddr localAddr, @@ -45,33 +60,112 @@ class ESListener : public Listener { } protected: + /** + * @brief Init work: get vid length, get es client + */ void init() override; + /** + * @brief Send data by es client + * + * @param data Key/value to apply + * @return True if succeed. False if failed. + */ bool apply(const std::vector& data) override; + /** + * @brief Persist commitLogId commitLogTerm and lastApplyLogId + */ bool persist(LogID lastId, TermID lastTerm, LogID lastApplyLogId) override; + /** + * @brief Get commit log id and commit log term from persistance storage, called in start() + * + * @return std::pair + */ std::pair lastCommittedLogId() override; + /** + * @brief Get last apply id from persistance storage, used in initialization + * + * @return LogID Last apply log id + */ LogID lastApplyLogId() override; private: + /** + * @brief Write last commit id, last commit term, last apply id to a file + * + * @param lastId Last commit id + * @param lastTerm Last commit term + * @param lastApplyLogId Last apply id + * @return Whether persist succeed + */ bool writeAppliedId(LogID lastId, TermID lastTerm, LogID lastApplyLogId); + /** + * @brief Encode last commit id, last commit term, last apply id to a file + * + * @param lastId Last commit id + * @param lastTerm Last commit term + * @param lastApplyLogId Last apply id + * @return Encoded string + */ std::string encodeAppliedId(LogID lastId, TermID lastTerm, LogID lastApplyLogId) const noexcept; + /** + * @brief Convert key value to DocItem + * + * @param items DocItems to send + * @param kv Key/value to encode into DocItems + * @return Whether append DocItem succeed + */ bool appendDocItem(std::vector& items, const KV& kv) const; + /** + * @brief Convert edge key value to DocItem + * + * @param items DocItems to send + * @param kv Edge key/value to encode into DocItems + * @return Whether append DocItem succeed + */ bool appendEdgeDocItem(std::vector& items, const KV& kv) const; + /** + * @brief Convert tag key value to DocItem + * + * @param items DocItems to send + * @param kv Edge key/value to encode into DocItems + * @return Whether append DocItem succeed + */ bool appendTagDocItem(std::vector& items, const KV& kv) const; + /** + * @brief Add the fulltext index field to DocItem + * + * @param items DocItems to send + * @param reader Key/value's reader + * @param fti Fulltext index schema + * @return Whether append DocItem succeed + */ bool appendDocs(std::vector& items, RowReader* reader, const std::pair& fti) const; + /** + * @brief Bulk DocItem to es + * + * @param items DocItems to send + * @return Whether send succeed + */ bool writeData(const std::vector& items) const; + /** + * @brief Put DocItem to es + * + * @param items DocItems to send + * @return Whether send succeed + */ bool writeDatum(const std::vector& items) const; private: diff --git a/src/kvstore/plugins/hbase/CMakeLists.txt b/src/kvstore/plugins/hbase/CMakeLists.txt deleted file mode 100644 index a16bb45f1e9..00000000000 --- a/src/kvstore/plugins/hbase/CMakeLists.txt +++ /dev/null @@ -1,20 +0,0 @@ -set(THRIFT1 ${Fbthrift_BIN}) -set(THRIFT_TEMPLATES ${Fbthrift_INCLUDE_DIR}/thrift/templates) -include(ThriftGenerate) - -# Target object name : hbase_thrift_obj -thrift_generate("hbase" "THBaseService" ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR} "hbase") - -nebula_add_library( - hbasestore_obj OBJECT - HBaseStore.cpp - HBaseClient.cpp -) - -# HBase things will be removed later. -# nebula_add_subdirectory(test) - -add_custom_target( - clean-hbase - COMMAND "rm" "-fr" "gen-cpp2" "gen-java" "gen-go" "gen-py" -) diff --git a/src/kvstore/plugins/hbase/HBaseClient.cpp b/src/kvstore/plugins/hbase/HBaseClient.cpp deleted file mode 100644 index 6e870bead63..00000000000 --- a/src/kvstore/plugins/hbase/HBaseClient.cpp +++ /dev/null @@ -1,272 +0,0 @@ -/* Copyright (c) 2019 vesoft inc. All rights reserved. - * - * This source code is licensed under Apache 2.0 License. - */ - -#include "kvstore/plugins/hbase/HBaseClient.h" - -#include -#include -#include -#include -#include - -#include "common/base/Base.h" -#include "common/network/NetworkUtils.h" - -namespace nebula { -namespace kvstore { - -const char* kColumnFamilyName = "cf"; - -HBaseClient::HBaseClient(const HostAddr& host) { - clientsMan_ = std::make_shared>(); - auto evb = folly::EventBaseManager::get()->getEventBase(); - client_ = clientsMan_->client(host, evb, true); -} - -HBaseClient::~HBaseClient() {} - -ResultCode HBaseClient::get(const std::string& tableName, const std::string& rowKey, KVMap& data) { - TGet tGet; - tGet.set_row(rowKey); - std::vector tColumnList; - TColumn tColumn; - tColumn.set_family(kColumnFamilyName); - tColumnList.emplace_back(tColumn); - tGet.set_columns(tColumnList); - - TResult tResult; - try { - client_->sync_get(tResult, tableName, tGet); - std::vector tColumnValueList = tResult.columnValues; - if (tColumnValueList.size() > 0) { - for (auto& cv : tColumnValueList) { - data.emplace(cv.qualifier, cv.value); - } - return ResultCode::SUCCEEDED; - } else { - return ResultCode::ERR_KEY_NOT_FOUND; - } - } catch (const TIOError& ex) { - LOG(ERROR) << "TIOError: " << ex.message; - return ResultCode::ERR_IO_ERROR; - } catch (const apache::thrift::transport::TTransportException& tte) { - LOG(ERROR) << "TTransportException: " << tte.what(); - return ResultCode::ERR_IO_ERROR; - } - return ResultCode::E_UNKNOWN; -} - -std::pair> HBaseClient::multiGet( - const std::string& tableName, - const std::vector& rowKeys, - std::vector>& dataList) { - std::vector tGetList; - for (auto& rowKey : rowKeys) { - TGet tGet; - tGet.set_row(rowKey); - std::vector tColumnList; - TColumn tColumn; - tColumn.set_family(kColumnFamilyName); - tColumnList.emplace_back(tColumn); - tGet.set_columns(tColumnList); - tGetList.emplace_back(tGet); - } - - std::vector tResultList; - std::vector status; - ResultCode resultCode = ResultCode::SUCCEEDED; - try { - client_->sync_getMultiple(tResultList, tableName, tGetList); - for (auto& tResult : tResultList) { - std::vector tColumnValueList = tResult.columnValues; - if (tColumnValueList.size() > 0) { - std::string rowKey = tResult.row; - KVMap data; - for (auto& cv : tColumnValueList) { - data.emplace(cv.qualifier, cv.value); - } - dataList.emplace_back(std::make_pair(rowKey, std::move(data))); - status.emplace_back(Status::OK()); - } else { - resultCode = ResultCode::ERR_PARTIAL_RESULT; - status.emplace_back(Status::KeyNotFound()); - } - } - return {resultCode, status}; - } catch (const TIOError& ex) { - LOG(ERROR) << "TIOError: " << ex.message; - return {ResultCode::ERR_IO_ERROR, status}; - } catch (const apache::thrift::transport::TTransportException& tte) { - LOG(ERROR) << "TTransportException: " << tte.what(); - return {ResultCode::ERR_IO_ERROR, status}; - } - return {ResultCode::E_UNKNOWN, status}; -} - -ResultCode HBaseClient::put(const std::string& tableName, - std::string& rowKey, - std::vector& data) { - TPut tPut; - tPut.set_row(rowKey); - std::vector tColumnValueList; - for (auto& kv : data) { - TColumnValue tColumnValue; - tColumnValue.set_family(kColumnFamilyName); - tColumnValue.set_qualifier(kv.first); - tColumnValue.set_value(kv.second); - tColumnValueList.emplace_back(tColumnValue); - } - tPut.set_columnValues(tColumnValueList); - - try { - client_->sync_put(tableName, tPut); - return ResultCode::SUCCEEDED; - } catch (const TIOError& ex) { - LOG(ERROR) << "TIOError: " << ex.message; - return ResultCode::ERR_IO_ERROR; - } catch (const apache::thrift::transport::TTransportException& tte) { - LOG(ERROR) << "TTransportException: " << tte.what(); - return ResultCode::ERR_IO_ERROR; - } - return ResultCode::E_UNKNOWN; -} - -ResultCode HBaseClient::multiPut(const std::string& tableName, - std::vector>>& dataList) { - std::vector tPutList; - for (auto& data : dataList) { - TPut tPut; - tPut.set_row(data.first); - auto kvs = data.second; - std::vector tColumnValueList; - for (auto& kv : kvs) { - TColumnValue tColumnValue; - tColumnValue.set_family(kColumnFamilyName); - tColumnValue.set_qualifier(kv.first); - tColumnValue.set_value(kv.second); - tColumnValueList.emplace_back(tColumnValue); - } - tPut.set_columnValues(tColumnValueList); - tPutList.emplace_back(tPut); - } - - try { - client_->sync_putMultiple(tableName, tPutList); - return ResultCode::SUCCEEDED; - } catch (const TIOError& ex) { - LOG(ERROR) << "TIOError: " << ex.message; - return ResultCode::ERR_IO_ERROR; - } catch (const apache::thrift::transport::TTransportException& tte) { - LOG(ERROR) << "TTransportException: " << tte.what(); - return ResultCode::ERR_IO_ERROR; - } - return ResultCode::E_UNKNOWN; -} - -ResultCode HBaseClient::range(const std::string& tableName, - const std::string& startRowKey, - const std::string& endRowKey, - std::vector>& dataList) { - // TODO(zhangguoqing) This is a simple implementation that get all results - // immediately, and in the future, it will use HBaseScanIter to improve - // performance. - TScan tScan; - tScan.set_startRow(startRowKey); - tScan.set_stopRow(endRowKey); - std::vector tColumnList; - TColumn tColumn; - tColumn.set_family(kColumnFamilyName); - tColumnList.emplace_back(tColumn); - tScan.set_columns(tColumnList); - tScan.set_caching(kScanRowNum * kScanRowNum); - - int32_t scannerId = -1; - try { - scannerId = client_->sync_openScanner(tableName, tScan); - while (true) { - std::vector tResultList; - client_->sync_getScannerRows(tResultList, scannerId, kScanRowNum); - if (tResultList.empty()) break; - for (auto& tResult : tResultList) { - std::vector tColumnValueList = tResult.columnValues; - if (tColumnValueList.size() > 0) { - std::string rowKey = tResult.row; - KVMap data; - for (auto& cv : tColumnValueList) { - data.emplace(cv.qualifier, cv.value); - } - dataList.emplace_back(std::make_pair(rowKey, std::move(data))); - } - } - tResultList.clear(); - } - client_->sync_closeScanner(scannerId); - return ResultCode::SUCCEEDED; - } catch (const TIOError& ex) { - if (scannerId >= 0) client_->sync_closeScanner(scannerId); - LOG(ERROR) << "TIOError: " << ex.message; - return ResultCode::ERR_IO_ERROR; - } catch (const apache::thrift::transport::TTransportException& tte) { - if (scannerId >= 0) client_->sync_closeScanner(scannerId); - LOG(ERROR) << "TTransportException: " << tte.what(); - return ResultCode::ERR_IO_ERROR; - } - return ResultCode::E_UNKNOWN; -} - -ResultCode HBaseClient::remove(const std::string& tableName, const std::string& rowKey) { - TDelete tDelete; - tDelete.set_row(rowKey); - TColumn tColumn; - tColumn.set_family(kColumnFamilyName); - std::vector tColumnList; - tColumnList.emplace_back(tColumn); - tDelete.set_columns(tColumnList); - tDelete.set_durability(TDurability::ASYNC_WAL); - - try { - client_->sync_deleteSingle(tableName, tDelete); - return ResultCode::SUCCEEDED; - } catch (const TIOError& ex) { - LOG(ERROR) << "TIOError: " << ex.message; - return ResultCode::ERR_IO_ERROR; - } catch (const apache::thrift::transport::TTransportException& tte) { - LOG(ERROR) << "TTransportException: " << tte.what(); - return ResultCode::ERR_IO_ERROR; - } - return ResultCode::E_UNKNOWN; -} - -ResultCode HBaseClient::multiRemove(const std::string& tableName, - std::vector& rowKeys) { - std::vector tDeleteList; - for (auto& rowKey : rowKeys) { - TDelete tDelete; - tDelete.set_row(rowKey); - TColumn tColumn; - tColumn.set_family(kColumnFamilyName); - std::vector tColumnList; - tColumnList.emplace_back(tColumn); - tDelete.set_columns(tColumnList); - tDelete.set_durability(TDurability::ASYNC_WAL); - tDeleteList.emplace_back(tDelete); - } - - std::vector tDeleteResultList; - try { - client_->sync_deleteMultiple(tDeleteResultList, tableName, tDeleteList); - return ResultCode::SUCCEEDED; - } catch (const TIOError& ex) { - LOG(ERROR) << "TIOError: " << ex.message; - return ResultCode::ERR_IO_ERROR; - } catch (const apache::thrift::transport::TTransportException& tte) { - LOG(ERROR) << "TTransportException: " << tte.what(); - return ResultCode::ERR_IO_ERROR; - } - return ResultCode::E_UNKNOWN; -} - -} // namespace kvstore -} // namespace nebula diff --git a/src/kvstore/plugins/hbase/HBaseClient.h b/src/kvstore/plugins/hbase/HBaseClient.h deleted file mode 100644 index 372db1b30cd..00000000000 --- a/src/kvstore/plugins/hbase/HBaseClient.h +++ /dev/null @@ -1,60 +0,0 @@ -/* Copyright (c) 2019 vesoft inc. All rights reserved. - * - * This source code is licensed under Apache 2.0 License. - */ - -#ifndef KVSTORE_PLUGINS_HBASE_HBASECLIENT_H_ -#define KVSTORE_PLUGINS_HBASE_HBASECLIENT_H_ - -#include - -#include "common/base/Base.h" -#include "common/base/ErrorOr.h" -#include "common/base/Status.h" -#include "common/thrift/ThriftClientManager.h" -#include "gen-cpp2/THBaseServiceAsyncClient.h" -#include "gen-cpp2/hbase_types.h" -#include "kvstore/Common.h" - -namespace nebula { -namespace kvstore { - -using namespace apache::hadoop::hbase::thrift2::cpp2; // NOLINT - -class HBaseClient final { - public: - explicit HBaseClient(const HostAddr& host); - ~HBaseClient(); - - ResultCode get(const std::string& tableName, const std::string& rowKey, KVMap& data); - - std::pair> multiGet( - const std::string& tableName, - const std::vector& rowKeys, - std::vector>& dataList); - - ResultCode put(const std::string& tableName, std::string& rowKey, std::vector& data); - - ResultCode multiPut(const std::string& tableName, - std::vector>>& dataList); - - ResultCode range(const std::string& tableName, - const std::string& start, - const std::string& end, - std::vector>& dataList); - - ResultCode remove(const std::string& tableName, const std::string& rowKey); - - ResultCode multiRemove(const std::string& tableName, std::vector& rowKeys); - - private: - std::shared_ptr> clientsMan_; - - std::shared_ptr client_; - - const int32_t kScanRowNum = {1024}; -}; - -} // namespace kvstore -} // namespace nebula -#endif // KVSTORE_PLUGINS_HBASE_HBASECLIENT_H_ diff --git a/src/kvstore/plugins/hbase/hbase.thrift b/src/kvstore/plugins/hbase/hbase.thrift deleted file mode 100644 index 710a325127d..00000000000 --- a/src/kvstore/plugins/hbase/hbase.thrift +++ /dev/null @@ -1,622 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// NOTE: The "required" and "optional" keywords for the service methods are purely for documentation - -namespace java org.apache.hadoop.hbase.thrift2.generated -namespace cpp apache.hadoop.hbase.thrift2 -namespace rb Apache.Hadoop.Hbase.Thrift2 -namespace py hbase -namespace perl Hbase - -struct TTimeRange { - 1: required i64 minStamp, - 2: required i64 maxStamp -} - -/** - * Addresses a single cell or multiple cells - * in a HBase table by column family and optionally - * a column qualifier and timestamp - */ -struct TColumn { - 1: required binary family, - 2: optional binary qualifier, - 3: optional i64 timestamp -} - -/** - * Represents a single cell and its value. - */ -struct TColumnValue { - 1: required binary family, - 2: required binary qualifier, - 3: required binary value, - 4: optional i64 timestamp, - 5: optional binary tags -} - -/** - * Represents a single cell and the amount to increment it by - */ -struct TColumnIncrement { - 1: required binary family, - 2: required binary qualifier, - 3: optional i64 amount = 1 -} - -/** - * if no Result is found, row and columnValues will not be set. - */ -struct TResult { - 1: optional binary row, - 2: required list columnValues -} - -/** - * Specify type of delete: - * - DELETE_COLUMN means exactly one version will be removed, - * - DELETE_COLUMNS means previous versions will also be removed. - */ -enum TDeleteType { - DELETE_COLUMN = 0, - DELETE_COLUMNS = 1 -} - -/** - * Specify Durability: - * - SKIP_WAL means do not write the Mutation to the WAL. - * - ASYNC_WAL means write the Mutation to the WAL asynchronously, - * - SYNC_WAL means write the Mutation to the WAL synchronously, - * - FSYNC_WAL means Write the Mutation to the WAL synchronously and force the entries to disk. - */ - -enum TDurability { - SKIP_WAL = 1, - ASYNC_WAL = 2, - SYNC_WAL = 3, - FSYNC_WAL = 4 -} -struct TAuthorization { - 1: optional list labels -} - -struct TCellVisibility { - 1: optional string expression -} -/** - * Used to perform Get operations on a single row. - * - * The scope can be further narrowed down by specifying a list of - * columns or column families. - * - * To get everything for a row, instantiate a Get object with just the row to get. - * To further define the scope of what to get you can add a timestamp or time range - * with an optional maximum number of versions to return. - * - * If you specify a time range and a timestamp the range is ignored. - * Timestamps on TColumns are ignored. - */ -struct TGet { - 1: required binary row, - 2: optional list columns, - - 3: optional i64 timestamp, - 4: optional TTimeRange timeRange, - - 5: optional i32 maxVersions, - 6: optional binary filterString, - 7: optional map attributes - 8: optional TAuthorization authorizations -} - -/** - * Used to perform Put operations for a single row. - * - * Add column values to this object and they'll be added. - * You can provide a default timestamp if the column values - * don't have one. If you don't provide a default timestamp - * the current time is inserted. - * - * You can specify how this Put should be written to the write-ahead Log (WAL) - * by changing the durability. If you don't provide durability, it defaults to - * column family's default setting for durability. - */ -struct TPut { - 1: required binary row, - 2: required list columnValues - 3: optional i64 timestamp, - 5: optional map attributes, - 6: optional TDurability durability, - 7: optional TCellVisibility cellVisibility -} - -/** - * Used to perform Delete operations on a single row. - * - * The scope can be further narrowed down by specifying a list of - * columns or column families as TColumns. - * - * Specifying only a family in a TColumn will delete the whole family. - * If a timestamp is specified all versions with a timestamp less than - * or equal to this will be deleted. If no timestamp is specified the - * current time will be used. - * - * Specifying a family and a column qualifier in a TColumn will delete only - * this qualifier. If a timestamp is specified only versions equal - * to this timestamp will be deleted. If no timestamp is specified the - * most recent version will be deleted. To delete all previous versions, - * specify the DELETE_COLUMNS TDeleteType. - * - * The top level timestamp is only used if a complete row should be deleted - * (i.e. no columns are passed) and if it is specified it works the same way - * as if you had added a TColumn for every column family and this timestamp - * (i.e. all versions older than or equal in all column families will be deleted) - * - * You can specify how this Delete should be written to the write-ahead Log (WAL) - * by changing the durability. If you don't provide durability, it defaults to - * column family's default setting for durability. - */ -struct TDelete { - 1: required binary row, - 2: optional list columns, - 3: optional i64 timestamp, - 4: optional TDeleteType deleteType = 1, - 6: optional map attributes, - 7: optional TDurability durability - -} - -/** - * Used to perform Increment operations for a single row. - * - * You can specify how this Increment should be written to the write-ahead Log (WAL) - * by changing the durability. If you don't provide durability, it defaults to - * column family's default setting for durability. - */ -struct TIncrement { - 1: required binary row, - 2: required list columns, - 4: optional map attributes, - 5: optional TDurability durability - 6: optional TCellVisibility cellVisibility -} - -/* - * Used to perform append operation - */ -struct TAppend { - 1: required binary row, - 2: required list columns, - 3: optional map attributes, - 4: optional TDurability durability - 5: optional TCellVisibility cellVisibility -} - -/** - * Any timestamps in the columns are ignored but the colFamTimeRangeMap included, use timeRange to select by timestamp. - * Max versions defaults to 1. - */ -struct TScan { - 1: optional binary startRow, - 2: optional binary stopRow, - 3: optional list columns - 4: optional i32 caching, - 5: optional i32 maxVersions=1, - 6: optional TTimeRange timeRange, - 7: optional binary filterString, - 8: optional i32 batchSize, - 9: optional map attributes - 10: optional TAuthorization authorizations - 11: optional bool reversed - 12: optional bool cacheBlocks - 13: optional map colFamTimeRangeMap - 14: optional bool small -} - -/** - * Atomic mutation for the specified row. It can be either Put or Delete. - */ -union TMutation { - 1: TPut put, - 2: TDelete deleteSingle, -} - -/** - * A TRowMutations object is used to apply a number of Mutations to a single row. - */ -struct TRowMutations { - 1: required binary row - 2: required list mutations -} - -struct THRegionInfo { - 1: required i64 regionId - 2: required binary tableName - 3: optional binary startKey - 4: optional binary endKey - 5: optional bool offline - 6: optional bool split - 7: optional i32 replicaId -} - -struct TServerName { - 1: required string hostName - 2: optional i32 port - 3: optional i64 startCode -} - -struct THRegionLocation { - 1: required TServerName serverName - 2: required THRegionInfo regionInfo -} - -/** - * Thrift wrapper around - * org.apache.hadoop.hbase.filter.CompareFilter$CompareOp. - */ -enum TCompareOp { - LESS = 0, - LESS_OR_EQUAL = 1, - EQUAL = 2, - NOT_EQUAL = 3, - GREATER_OR_EQUAL = 4, - GREATER = 5, - NO_OP = 6 -} - - -// -// Exceptions -// - -/** - * A TIOError exception signals that an error occurred communicating - * to the HBase master or a HBase region server. Also used to return - * more general HBase error conditions. - */ -exception TIOError { - 1: optional string message -} - -/** - * A TIllegalArgument exception indicates an illegal or invalid - * argument was passed into a procedure. - */ -exception TIllegalArgument { - 1: optional string message -} - -service THBaseService { - - /** - * Test for the existence of columns in the table, as specified in the TGet. - * - * @return true if the specified TGet matches one or more keys, false if not - */ - bool exists( - /** the table to check on */ - 1: binary table, - - /** the TGet to check for */ - 2: TGet tget - ) throws (1:TIOError io) - - - /** - * Test for the existence of columns in the table, as specified by the TGets. - * - * This will return an array of booleans. Each value will be true if the related Get matches - * one or more keys, false if not. - */ - list existsAll( - /** the table to check on */ - 1: binary table, - - /** a list of TGets to check for */ - 2: list tgets - ) throws (1:TIOError io) - - /** - * Method for getting data from a row. - * - * If the row cannot be found an empty Result is returned. - * This can be checked by the empty field of the TResult - * - * @return the result - */ - TResult get( - /** the table to get from */ - 1: binary table, - - /** the TGet to fetch */ - 2: TGet tget - ) throws (1: TIOError io) - - /** - * Method for getting multiple rows. - * - * If a row cannot be found there will be a null - * value in the result list for that TGet at the - * same position. - * - * So the Results are in the same order as the TGets. - */ - list getMultiple( - /** the table to get from */ - 1: binary table, - - /** a list of TGets to fetch, the Result list - will have the Results at corresponding positions - or null if there was an error */ - 2: list tgets - ) throws (1: TIOError io) - - /** - * Commit a TPut to a table. - */ - void put( - /** the table to put data in */ - 1: binary table, - - /** the TPut to put */ - 2: TPut tput - ) throws (1: TIOError io) - - /** - * Atomically checks if a row/family/qualifier value matches the expected - * value. If it does, it adds the TPut. - * - * @return true if the new put was executed, false otherwise - */ - bool checkAndPut( - /** to check in and put to */ - 1: binary table, - - /** row to check */ - 2: binary row, - - /** column family to check */ - 3: binary family, - - /** column qualifier to check */ - 4: binary qualifier, - - /** the expected value, if not provided the - check is for the nonexistence of the - column in question */ - 5: binary value, - - /** the TPut to put if the check succeeds */ - 6: TPut tput - ) throws (1: TIOError io) - - /** - * Commit a List of Puts to the table. - */ - void putMultiple( - /** the table to put data in */ - 1: binary table, - - /** a list of TPuts to commit */ - 2: list tputs - ) throws (1: TIOError io) - - /** - * Deletes as specified by the TDelete. - * - * Note: "delete" is a reserved keyword and cannot be used in Thrift - * thus the inconsistent naming scheme from the other functions. - */ - void deleteSingle( - /** the table to delete from */ - 1: binary table, - - /** the TDelete to delete */ - 2: TDelete tdelete - ) throws (1: TIOError io) - - /** - * Bulk commit a List of TDeletes to the table. - * - * Throws a TIOError if any of the deletes fail. - * - * Always returns an empty list for backwards compatibility. - */ - list deleteMultiple( - /** the table to delete from */ - 1: binary table, - - /** list of TDeletes to delete */ - 2: list tdeletes - ) throws (1: TIOError io) - - /** - * Atomically checks if a row/family/qualifier value matches the expected - * value. If it does, it adds the delete. - * - * @return true if the new delete was executed, false otherwise - */ - bool checkAndDelete( - /** to check in and delete from */ - 1: binary table, - - /** row to check */ - 2: binary row, - - /** column family to check */ - 3: binary family, - - /** column qualifier to check */ - 4: binary qualifier, - - /** the expected value, if not provided the - check is for the nonexistence of the - column in question */ - 5: binary value, - - /** the TDelete to execute if the check succeeds */ - 6: TDelete tdelete - ) throws (1: TIOError io) - - TResult increment( - /** the table to increment the value on */ - 1: binary table, - - /** the TIncrement to increment */ - 2: TIncrement tincrement - ) throws (1: TIOError io) - - TResult append( - /** the table to append the value on */ - 1: binary table, - - /** the TAppend to append */ - 2: TAppend tappend - ) throws (1: TIOError io) - - /** - * Get a Scanner for the provided TScan object. - * - * @return Scanner Id to be used with other scanner procedures - */ - i32 openScanner( - /** the table to get the Scanner for */ - 1: binary table, - - /** the scan object to get a Scanner for */ - 2: TScan tscan, - ) throws (1: TIOError io) - - /** - * Grabs multiple rows from a Scanner. - * - * @return Between zero and numRows TResults - */ - list getScannerRows( - /** the Id of the Scanner to return rows from. This is an Id returned from the openScanner function. */ - 1: i32 scannerId, - - /** number of rows to return */ - 2: i32 numRows = 1 - ) throws ( - 1: TIOError io, - - /** if the scannerId is invalid */ - 2: TIllegalArgument ia - ) - - /** - * Closes the scanner. Should be called to free server side resources timely. - * Typically close once the scanner is not needed anymore, i.e. after looping - * over it to get all the required rows. - */ - void closeScanner( - /** the Id of the Scanner to close **/ - 1: i32 scannerId - ) throws ( - 1: TIOError io, - - /** if the scannerId is invalid */ - 2: TIllegalArgument ia - ) - - /** - * mutateRow performs multiple mutations atomically on a single row. - */ - void mutateRow( - /** table to apply the mutations */ - 1: binary table, - - /** mutations to apply */ - 2: TRowMutations trowMutations - ) throws (1: TIOError io) - - /** - * Get results for the provided TScan object. - * This helper function opens a scanner, get the results and close the scanner. - * - * @return between zero and numRows TResults - */ - list getScannerResults( - /** the table to get the Scanner for */ - 1: binary table, - - /** the scan object to get a Scanner for */ - 2: TScan tscan, - - /** number of rows to return */ - 3: i32 numRows = 1 - ) throws ( - 1: TIOError io - ) - - /** - * Given a table and a row get the location of the region that - * would contain the given row key. - * - * reload = true means the cache will be cleared and the location - * will be fetched from meta. - */ - THRegionLocation getRegionLocation( - 1: binary table, - 2: binary row, - 3: bool reload, - ) throws ( - 1: TIOError io - ) - - /** - * Get all of the region locations for a given table. - **/ - list getAllRegionLocations( - 1: binary table, - ) throws ( - 1: TIOError io - ) - - /** - * Atomically checks if a row/family/qualifier value matches the expected - * value. If it does, it mutates the row. - * - * @return true if the row was mutated, false otherwise - */ - bool checkAndMutate( - /** to check in and delete from */ - 1: binary table, - - /** row to check */ - 2: binary row, - - /** column family to check */ - 3: binary family, - - /** column qualifier to check */ - 4: binary qualifier, - - /** comparison to make on the value */ - 5: TCompareOp compareOp, - - /** the expected value to be compared against, if not provided the - check is for the nonexistence of the column in question */ - 6: binary value, - - /** row mutations to execute if the value matches */ - 7: TRowMutations rowMutations - ) throws (1: TIOError io) -} diff --git a/src/kvstore/plugins/hbase/test/CMakeLists.txt b/src/kvstore/plugins/hbase/test/CMakeLists.txt deleted file mode 100644 index 8ad5c3f7f12..00000000000 --- a/src/kvstore/plugins/hbase/test/CMakeLists.txt +++ /dev/null @@ -1,62 +0,0 @@ -nebula_add_test( - DISABLED - NAME - hbase_client_test - SOURCES - HBaseClientTest.cpp - OBJECTS - $ - $ - $ - $ - $ - $ - $ - $ - $ - $ - $ - $ - $ - $ - $ - $ - $ - LIBRARIES - ${ROCKSDB_LIBRARIES} - ${THRIFT_LIBRARIES} - wangle - gtest -) - -nebula_add_test( - DISABLED - NAME - hbase_store_test - SOURCES - HBaseStoreTest.cpp - OBJECTS - $ - $ - $ - $ - $ - $ - $ - $ - $ - $ - $ - $ - $ - $ - $ - $ - $ - $ - LIBRARIES - ${ROCKSDB_LIBRARIES} - ${THRIFT_LIBRARIES} - wangle - gtest -) diff --git a/src/kvstore/plugins/hbase/test/HBaseClientTest.cpp b/src/kvstore/plugins/hbase/test/HBaseClientTest.cpp deleted file mode 100644 index 1bcd5cde870..00000000000 --- a/src/kvstore/plugins/hbase/test/HBaseClientTest.cpp +++ /dev/null @@ -1,138 +0,0 @@ -/* Copyright (c) 2019 vesoft inc. All rights reserved. - * - * This source code is licensed under Apache 2.0 License. - */ - -#include - -#include "common/base/Base.h" -#include "common/network/NetworkUtils.h" -#include "kvstore/plugins/hbase/HBaseClient.h" - -/** - * TODO(zhangguoqing) Add a test runner to provide HBase/thrift2 service. - * hbase/bin/hbase-daemon.sh start thrift2 -b 127.0.0.1 -p 9096 - * hbase(main):001:0> create 'Nebula_Graph_Space_0', 'cf' - * */ - -namespace nebula { -namespace kvstore { - -TEST(HBaseClientTest, SimpleTest) { - auto hbaseClient = std::make_shared(HostAddr(0, 9096)); - std::string tableName = "Nebula_Graph_Space_0"; - std::string rowKey = "rowKey"; - std::vector putData; - for (int32_t i = 0; i < 10; i++) { - putData.emplace_back(folly::stringPrintf("col_%d", i), folly::stringPrintf("val_%d", i)); - } - EXPECT_EQ(ResultCode::SUCCEEDED, hbaseClient->put(tableName, rowKey, putData)); - - KVMap getData; - EXPECT_EQ(ResultCode::SUCCEEDED, hbaseClient->get(tableName, rowKey, getData)); - EXPECT_EQ(10, getData.size()); - for (int32_t i = 0; i < 10; i++) { - auto kvIt = getData.find(folly::stringPrintf("col_%d", i)); - EXPECT_TRUE(kvIt != getData.end()); - if (kvIt != getData.end()) { - EXPECT_EQ(kvIt->second, folly::stringPrintf("val_%d", i)); - } - } - - EXPECT_EQ(ResultCode::SUCCEEDED, hbaseClient->remove(tableName, rowKey)); - EXPECT_EQ(ResultCode::ERR_KEY_NOT_FOUND, hbaseClient->get(tableName, rowKey, getData)); -} - -TEST(HBaseClientTest, MultiTest) { - auto hbaseClient = std::make_shared(HostAddr(0, 9096)); - std::string tableName = "Nebula_Graph_Space_0"; - std::vector rowKeys; - std::vector>> dataList; - for (int32_t i = 10; i < 20; i++) { - std::string rowKey = "rowKey_" + folly::to(i); - rowKeys.emplace_back(rowKey); - std::vector putData; - for (int32_t j = 0; j < 10; j++) { - putData.emplace_back(folly::stringPrintf("col_%d", j), folly::stringPrintf("val_%d", j)); - } - dataList.emplace_back(rowKey, putData); - } - EXPECT_EQ(ResultCode::SUCCEEDED, hbaseClient->multiPut(tableName, dataList)); - - std::vector> retDataList; - auto ret = hbaseClient->multiGet(tableName, rowKeys, retDataList); - EXPECT_EQ(ResultCode::SUCCEEDED, ret.first); - EXPECT_EQ(10, retDataList.size()); - for (size_t index = 0; index < retDataList.size(); index++) { - EXPECT_EQ(rowKeys[index], retDataList[index].first); - EXPECT_EQ(10, retDataList[index].second.size()); - for (int32_t i = 0; i < 10; i++) { - auto kvIt = retDataList[index].second.find(folly::stringPrintf("col_%d", i)); - EXPECT_TRUE(kvIt != retDataList[index].second.end()); - if (kvIt != retDataList[index].second.end()) { - EXPECT_EQ(kvIt->second, folly::stringPrintf("val_%d", i)); - } - } - } - - EXPECT_EQ(ResultCode::SUCCEEDED, hbaseClient->multiRemove(tableName, rowKeys)); - retDataList.clear(); - ret = hbaseClient->multiGet(tableName, rowKeys, retDataList); - EXPECT_EQ(ResultCode::E_UNKNOWN, ret.first); - EXPECT_EQ(0, retDataList.size()); -} - -TEST(HBaseClientTest, RangeTest) { - auto hbaseClient = std::make_shared(HostAddr(0, 9096)); - std::string tableName = "Nebula_Graph_Space_0"; - std::vector rowKeys; - std::vector>> dataList; - for (int32_t i = 10; i < 20; i++) { - std::string rowKey = "rowKey_" + folly::to(i); - rowKeys.emplace_back(rowKey); - std::vector putData; - putData.emplace_back("col", "val"); - dataList.emplace_back(rowKey, putData); - } - EXPECT_EQ(ResultCode::SUCCEEDED, hbaseClient->multiPut(tableName, dataList)); - - auto checkRange = [&](int32_t start, int32_t end, int32_t expectedFrom, int32_t expectedTotal) { - LOG(INFO) << "start " << start << ", end " << end << ", expectedFrom " << expectedFrom - << ", expectedTotal " << expectedTotal; - std::string s = "rowKey_" + folly::to(start); - std::string e = "rowKey_" + folly::to(end); - std::vector> retDataList; - EXPECT_EQ(ResultCode::SUCCEEDED, hbaseClient->range(tableName, s, e, retDataList)); - - int32_t num = 0; - for (auto& retData : retDataList) { - EXPECT_EQ("rowKey_" + folly::to(expectedFrom + num), retData.first); - EXPECT_EQ(1, retData.second.size()); - auto kvIt = retData.second.find("col"); - EXPECT_TRUE(kvIt != retData.second.end()); - if (kvIt != retData.second.end()) { - EXPECT_EQ(kvIt->second, "val"); - } - num++; - } - EXPECT_EQ(expectedTotal, num); - }; - - checkRange(10, 20, 10, 10); - checkRange(1, 50, 10, 10); - checkRange(15, 18, 15, 3); - checkRange(15, 23, 15, 5); - checkRange(1, 15, 10, 5); - EXPECT_EQ(ResultCode::SUCCEEDED, hbaseClient->multiRemove(tableName, rowKeys)); -} - -} // namespace kvstore -} // namespace nebula - -int main(int argc, char** argv) { - testing::InitGoogleTest(&argc, argv); - folly::init(&argc, &argv, true); - google::SetStderrLogging(google::INFO); - - return RUN_ALL_TESTS(); -} diff --git a/src/kvstore/plugins/hbase/test/HBaseStoreTest.cpp b/src/kvstore/plugins/hbase/test/HBaseStoreTest.cpp deleted file mode 100644 index cf62f839f22..00000000000 --- a/src/kvstore/plugins/hbase/test/HBaseStoreTest.cpp +++ /dev/null @@ -1,124 +0,0 @@ -/* Copyright (c) 2019 vesoft inc. All rights reserved. - * - * This source code is licensed under Apache 2.0 License. - */ - -#include - -#include "common/base/Base.h" -#include "common/utils/NebulaKeyUtils.h" -#include "kvstore/plugins/hbase/HBaseStore.h" -#include "kvstore/plugins/hbase/test/TestUtils.h" - -/** - * TODO(zhangguoqing) Add a test runner to provide HBase/thrift2 service. - * hbase/bin/hbase-daemon.sh start thrift2 -b 127.0.0.1 -p 9096 - * hbase(main):001:0> create 'Nebula_Graph_Space_0', 'cf' - * */ - -namespace nebula { -namespace kvstore { - -TEST(HBaseStoreTest, SimpleTest) { - KVOptions options; - auto schemaMan = TestUtils::mockSchemaMan(); - auto sm = schemaMan.get(); - CHECK_NOTNULL(sm); - options.hbaseServer_ = HostAddr(0, 9096); - options.schemaMan_ = schemaMan.get(); - auto hbaseStore = std::make_unique(std::move(options)); - hbaseStore->init(); - - LOG(INFO) << "Put some data then read them..."; - GraphSpaceID spaceId = 0; - PartitionID partId = 0; - VertexID srcId = 10L, dstId = 20L; - EdgeType edgeType = 101; - EdgeRanking rank = 10L; - EdgeVerPlaceHolder edgeVersion = 1; - std::vector edgeKeys; - std::vector edgeData; - auto edgeSchema = sm->getEdgeSchema(spaceId, edgeType, edgeVersion); - for (auto vertexId = srcId; vertexId < dstId; vertexId++) { - auto edgeKey = NebulaKeyUtils::edgeKey(partId, srcId, edgeType, rank, vertexId); - edgeKeys.emplace_back(edgeKey); - RowWriter edgeWriter(edgeSchema); - for (int32_t iInt = 0; iInt < 10; iInt++) { - edgeWriter << iInt; - } - for (int32_t iString = 10; iString < 20; iString++) { - edgeWriter << folly::stringPrintf("string_col_%d", iString); - } - auto edgeValue = edgeWriter.encode(); - edgeData.emplace_back(edgeKey, edgeValue); - } - - edgeSchema = sm->getEdgeSchema(spaceId, edgeType + 1, edgeVersion); - for (; edgeVersion < 10L; edgeVersion++) { - auto edgeKey = NebulaKeyUtils::edgeKey(partId, srcId, edgeType + 1, rank, dstId); - edgeKeys.emplace_back(edgeKey); - RowWriter edgeWriter(edgeSchema); - for (int32_t iInt = 0; iInt < 5; iInt++) { - edgeWriter << iInt; - } - for (int32_t iString = 5; iString < 10; iString++) { - edgeWriter << folly::stringPrintf("string_col_%d", iString); - } - auto edgeValue = edgeWriter.encode(); - edgeData.emplace_back(edgeKey, edgeValue); - } - - hbaseStore->asyncMultiPut( - spaceId, partId, edgeData, [](ResultCode code) { EXPECT_EQ(ResultCode::SUCCEEDED, code); }); - - std::vector retEdgeValues; - auto ret = hbaseStore->multiGet(spaceId, partId, edgeKeys, &retEdgeValues); - EXPECT_EQ(ResultCode::SUCCEEDED, ret.first); - EXPECT_EQ(20, retEdgeValues.size()); - - auto checkPrefix = [&](const std::string& prefix, int32_t expectedFrom, int32_t expectedTotal) { - LOG(INFO) << "prefix " << prefix << ", expectedFrom " << expectedFrom << ", expectedTotal " - << expectedTotal; - std::unique_ptr iter; - EXPECT_EQ(ResultCode::SUCCEEDED, hbaseStore->prefix(spaceId, partId, prefix, &iter)); - int num = 0; - while (iter->valid()) { - num++; - std::string key(iter->key()); - EXPECT_EQ(edgeKeys[expectedFrom], key); - std::string val(iter->val()); - EXPECT_EQ(edgeData[expectedFrom].second, val); - expectedFrom++; - iter->next(); - } - EXPECT_EQ(expectedTotal, num); - }; - std::string prefix1 = NebulaKeyUtils::tagPrefix(partId, srcId); - checkPrefix(prefix1, 0, 20); - std::string prefix2 = NebulaKeyUtils::edgePrefix(partId, srcId, edgeType); - checkPrefix(prefix2, 0, 10); - std::string prefix3 = NebulaKeyUtils::prefix(partId, srcId, edgeType + 1, rank, dstId); - checkPrefix(prefix3, 10, 10); - - hbaseStore->asyncRemovePrefix( - spaceId, partId, prefix3, [](ResultCode code) { EXPECT_EQ(ResultCode::SUCCEEDED, code); }); - - hbaseStore->asyncMultiRemove( - spaceId, partId, edgeKeys, [](ResultCode code) { EXPECT_EQ(ResultCode::SUCCEEDED, code); }); - - retEdgeValues.clear(); - ret = hbaseStore->multiGet(spaceId, partId, edgeKeys, &retEdgeValues); - EXPECT_EQ(ResultCode::E_UNKNOWN, ret.first); - EXPECT_EQ(0, retEdgeValues.size()); -} - -} // namespace kvstore -} // namespace nebula - -int main(int argc, char** argv) { - testing::InitGoogleTest(&argc, argv); - folly::init(&argc, &argv, true); - google::SetStderrLogging(google::INFO); - - return RUN_ALL_TESTS(); -} diff --git a/src/kvstore/plugins/hbase/test/TestUtils.h b/src/kvstore/plugins/hbase/test/TestUtils.h deleted file mode 100644 index 3260500e264..00000000000 --- a/src/kvstore/plugins/hbase/test/TestUtils.h +++ /dev/null @@ -1,72 +0,0 @@ -/* Copyright (c) 2019 vesoft inc. All rights reserved. - * - * This source code is licensed under Apache 2.0 License. - */ - -#include "common/base/Base.h" -#include "common/meta/SchemaManager.h" -#include "common/meta/SchemaProviderIf.h" -#include "storage/test/AdHocSchemaManager.h" - -namespace nebula { -namespace kvstore { - -class TestUtils { - public: - static std::unique_ptr mockSchemaMan() { - auto* schemaMan = new storage::AdHocSchemaManager(); - for (GraphSpaceID spaceId = 0; spaceId < 3; spaceId++) { - schemaMan->addEdgeSchema( - spaceId, 101 /*edge type*/, TestUtils::genEdgeSchemaProvider(10, 10)); - schemaMan->addEdgeSchema(spaceId, 102 /*edge type*/, TestUtils::genEdgeSchemaProvider(5, 5)); - for (auto tagId = 3001; tagId < 3010; tagId++) { - schemaMan->addTagSchema(spaceId, tagId, TestUtils::genTagSchemaProvider(tagId, 3, 3)); - } - } - std::unique_ptr sm(schemaMan); - return sm; - } - - private: - // It will generate SchemaProvider with some int fields and string fields - static std::shared_ptr genEdgeSchemaProvider(int32_t intFieldsNum, - int32_t stringFieldsNum) { - nebula::cpp2::Schema schema; - for (auto i = 0; i < intFieldsNum; i++) { - nebula::cpp2::ColumnDef column; - column.name = folly::stringPrintf("col_%d", i); - column.type.type = nebula::cpp2::SupportedType::INT; - schema.columns.emplace_back(std::move(column)); - } - for (auto i = intFieldsNum; i < intFieldsNum + stringFieldsNum; i++) { - nebula::cpp2::ColumnDef column; - column.name = folly::stringPrintf("col_%d", i); - column.type.type = nebula::cpp2::SupportedType::STRING; - schema.columns.emplace_back(std::move(column)); - } - return std::make_shared(std::move(schema)); - } - - // It will generate tag SchemaProvider with some int fields and string fields - static std::shared_ptr genTagSchemaProvider(TagID tagId, - int32_t intFieldsNum, - int32_t stringFieldsNum) { - nebula::cpp2::Schema schema; - for (auto i = 0; i < intFieldsNum; i++) { - nebula::cpp2::ColumnDef column; - column.name = folly::stringPrintf("tag_%d_col_%d", tagId, i); - column.type.type = nebula::cpp2::SupportedType::INT; - schema.columns.emplace_back(std::move(column)); - } - for (auto i = intFieldsNum; i < intFieldsNum + stringFieldsNum; i++) { - nebula::cpp2::ColumnDef column; - column.name = folly::stringPrintf("tag_%d_col_%d", tagId, i); - column.type.type = nebula::cpp2::SupportedType::STRING; - schema.columns.emplace_back(std::move(column)); - } - return std::make_shared(std::move(schema)); - } -}; - -} // namespace kvstore -} // namespace nebula diff --git a/src/kvstore/raftex/Host.cpp b/src/kvstore/raftex/Host.cpp index f61c15585fc..685c70ebd28 100644 --- a/src/kvstore/raftex/Host.cpp +++ b/src/kvstore/raftex/Host.cpp @@ -40,18 +40,18 @@ void Host::waitForStop() { CHECK(stopped_); noMoreRequestCV_.wait(g, [this] { return !requestOnGoing_; }); - LOG(INFO) << idStr_ << "The host has been stopped!"; + VLOG(1) << idStr_ << "The host has been stopped!"; } nebula::cpp2::ErrorCode Host::canAppendLog() const { CHECK(!lock_.try_lock()); if (stopped_) { - VLOG(2) << idStr_ << "The host is stopped, just return"; + VLOG(3) << idStr_ << "The host is stopped, just return"; return nebula::cpp2::ErrorCode::E_RAFT_HOST_STOPPED; } if (paused_) { - VLOG(2) << idStr_ << "The host is paused, due to losing leadership"; + VLOG(3) << idStr_ << "The host is paused, due to losing leadership"; return nebula::cpp2::ErrorCode::E_RAFT_HOST_PAUSED; } return nebula::cpp2::ErrorCode::SUCCEEDED; @@ -62,7 +62,7 @@ folly::Future Host::askForVote(const cpp2::AskForVoteR { std::lock_guard g(lock_); if (stopped_) { - VLOG(2) << idStr_ << "The Host is not in a proper status, do not send"; + VLOG(3) << idStr_ << "The Host is not in a proper status, do not send"; cpp2::AskForVoteResponse resp; resp.error_code_ref() = nebula::cpp2::ErrorCode::E_RAFT_HOST_STOPPED; return resp; @@ -78,7 +78,7 @@ folly::Future Host::appendLogs(folly::EventBase* eb, LogID committedLogId, TermID prevLogTerm, LogID prevLogId) { - VLOG(3) << idStr_ << "Entering Host::appendLogs()"; + VLOG(4) << idStr_ << "Entering Host::appendLogs()"; auto ret = folly::Future::makeEmpty(); std::shared_ptr req; @@ -88,7 +88,7 @@ folly::Future Host::appendLogs(folly::EventBase* eb, auto res = canAppendLog(); if (UNLIKELY(sendingSnapshot_)) { - LOG_EVERY_N(INFO, 500) << idStr_ << "The target host is waiting for a snapshot"; + VLOG_EVERY_N(2, 1000) << idStr_ << "The target host is waiting for a snapshot"; res = nebula::cpp2::ErrorCode::E_RAFT_WAITING_SNAPSHOT; } else if (requestOnGoing_) { // buffer incoming request to pendingReq_ @@ -96,27 +96,27 @@ folly::Future Host::appendLogs(folly::EventBase* eb, pendingReq_ = std::make_tuple(term, logId, committedLogId); return cachingPromise_.getFuture(); } else { - LOG_EVERY_N(INFO, 200) << idStr_ << "Too many requests are waiting, return error"; + VLOG_EVERY_N(2, 1000) << idStr_ << "Too many requests are waiting, return error"; res = nebula::cpp2::ErrorCode::E_RAFT_TOO_MANY_REQUESTS; } } if (res != nebula::cpp2::ErrorCode::SUCCEEDED) { - VLOG(2) << idStr_ << "The host is not in a proper status, just return"; + VLOG(3) << idStr_ << "The host is not in a proper status, just return"; cpp2::AppendLogResponse r; r.error_code_ref() = res; return r; } - VLOG(2) << idStr_ << "About to send the AppendLog request"; + VLOG(4) << idStr_ << "About to send the AppendLog request"; // No request is ongoing, let's send a new request if (UNLIKELY(lastLogIdSent_ == 0 && lastLogTermSent_ == 0)) { lastLogIdSent_ = prevLogId; lastLogTermSent_ = prevLogTerm; - LOG(INFO) << idStr_ << "This is the first time to send the logs to this host" - << ", lastLogIdSent = " << lastLogIdSent_ - << ", lastLogTermSent = " << lastLogTermSent_; + VLOG(2) << idStr_ << "This is the first time to send the logs to this host" + << ", lastLogIdSent = " << lastLogIdSent_ + << ", lastLogTermSent = " << lastLogTermSent_; } logTermToSend_ = term; logIdToSend_ = logId; @@ -124,8 +124,8 @@ folly::Future Host::appendLogs(folly::EventBase* eb, auto result = prepareAppendLogRequest(); if (ok(result)) { - LOG_IF(INFO, FLAGS_trace_raft) << idStr_ << "Sending the pending request in the queue" - << ", from " << lastLogIdSent_ + 1 << " to " << logIdToSend_; + VLOG_IF(1, FLAGS_trace_raft) << idStr_ << "Sending the pending request in the queue" + << ", from " << lastLogIdSent_ + 1 << " to " << logIdToSend_; req = std::move(value(result)); pendingReq_ = std::make_tuple(0, 0, 0); promise_ = std::move(cachingPromise_); @@ -161,7 +161,7 @@ void Host::appendLogsInternal(folly::EventBase* eb, std::shared_ptridStr_ << "AppendLogResponse " << "code " << apache::thrift::util::enumNameSafe(resp.get_error_code()) << ", currTerm " << resp.get_current_term() << ", lastLogTerm " << resp.get_last_matched_log_term() @@ -171,7 +171,7 @@ void Host::appendLogsInternal(folly::EventBase* eb, std::shared_ptridStr_ << "AppendLog request sent successfully"; + VLOG(3) << self->idStr_ << "AppendLog request sent successfully"; std::shared_ptr newReq; { @@ -189,7 +189,7 @@ void Host::appendLogsInternal(folly::EventBase* eb, std::shared_ptrfollowerCommittedLogId_ = resp.get_committed_log_id(); if (self->lastLogIdSent_ < self->logIdToSend_) { // More to send - VLOG(2) << self->idStr_ << "There are more logs to send"; + VLOG(3) << self->idStr_ << "There are more logs to send"; auto result = self->prepareAppendLogRequest(); if (ok(result)) { newReq = std::move(value(result)); @@ -217,9 +217,9 @@ void Host::appendLogsInternal(folly::EventBase* eb, std::shared_ptridStr_ << "Failed to append logs to the host (Err: " - << apache::thrift::util::enumNameSafe(resp.get_error_code()) << ")"; + VLOG_EVERY_N(2, 1000) << self->idStr_ << "Failed to append logs to the host (Err: " + << apache::thrift::util::enumNameSafe(resp.get_error_code()) + << ")"; { std::lock_guard g(self->lock_); self->setResponse(resp); @@ -230,13 +230,13 @@ void Host::appendLogsInternal(folly::EventBase* eb, std::shared_ptr{}, [self = shared_from_this(), req](TransportException&& ex) { - VLOG(2) << self->idStr_ << ex.what(); + VLOG(4) << self->idStr_ << ex.what(); cpp2::AppendLogResponse r; r.error_code_ref() = nebula::cpp2::ErrorCode::E_RAFT_RPC_EXCEPTION; { std::lock_guard g(self->lock_); if (ex.getType() == TransportException::TIMED_OUT) { - LOG_IF(INFO, FLAGS_trace_raft) + VLOG_IF(1, FLAGS_trace_raft) << self->idStr_ << "append log time out" << ", space " << req->get_space() << ", part " << req->get_part() << ", current term " << req->get_current_term() << ", committed_id " @@ -252,7 +252,7 @@ void Host::appendLogsInternal(folly::EventBase* eb, std::shared_ptr{}, [self = shared_from_this()](std::exception&& ex) { - VLOG(2) << self->idStr_ << ex.what(); + VLOG(4) << self->idStr_ << ex.what(); cpp2::AppendLogResponse r; r.error_code_ref() = nebula::cpp2::ErrorCode::E_RAFT_RPC_EXCEPTION; { @@ -267,7 +267,7 @@ void Host::appendLogsInternal(folly::EventBase* eb, std::shared_ptr> Host::prepareAppendLogRequest() { CHECK(!lock_.try_lock()); - VLOG(2) << idStr_ << "Prepare AppendLogs request from Log " << lastLogIdSent_ + 1 << " to " + VLOG(3) << idStr_ << "Prepare AppendLogs request from Log " << lastLogIdSent_ + 1 << " to " << logIdToSend_; auto makeReq = [this]() -> std::shared_ptr { @@ -294,10 +294,9 @@ Host::prepareAppendLogRequest() { } if (lastLogIdSent_ + 1 > part_->wal()->lastLogId()) { - LOG_IF(INFO, FLAGS_trace_raft) - << idStr_ << "My lastLogId in wal is " << part_->wal()->lastLogId() - << ", but you are seeking " << lastLogIdSent_ + 1 - << ", so i have nothing to send, logIdToSend_ = " << logIdToSend_; + VLOG_IF(1, FLAGS_trace_raft) << idStr_ << "My lastLogId in wal is " << part_->wal()->lastLogId() + << ", but you are seeking " << lastLogIdSent_ + 1 + << ", so i have nothing to send, logIdToSend_ = " << logIdToSend_; return nebula::cpp2::ErrorCode::E_RAFT_NO_WAL_FOUND; } @@ -315,7 +314,7 @@ Host::prepareAppendLogRequest() { // the last log entry's id is (lastLogIdSent_ + cnt), when iterator is invalid and last log // entry's id is not logIdToSend_, which means the log has been rollbacked if (!it->valid() && (lastLogIdSent_ + static_cast(logs.size()) != logIdToSend_)) { - LOG_IF(INFO, FLAGS_trace_raft) + VLOG_IF(1, FLAGS_trace_raft) << idStr_ << "Can't find log in wal, logIdToSend_ = " << logIdToSend_; return nebula::cpp2::ErrorCode::E_RAFT_NO_WAL_FOUND; } @@ -329,10 +328,10 @@ Host::prepareAppendLogRequest() { nebula::cpp2::ErrorCode Host::startSendSnapshot() { CHECK(!lock_.try_lock()); if (!sendingSnapshot_) { - LOG(INFO) << idStr_ << "Can't find log " << lastLogIdSent_ + 1 << " in wal, send the snapshot" - << ", logIdToSend = " << logIdToSend_ - << ", firstLogId in wal = " << part_->wal()->firstLogId() - << ", lastLogId in wal = " << part_->wal()->lastLogId(); + VLOG(1) << idStr_ << "Can't find log " << lastLogIdSent_ + 1 << " in wal, send the snapshot" + << ", logIdToSend = " << logIdToSend_ + << ", firstLogId in wal = " << part_->wal()->firstLogId() + << ", lastLogId in wal = " << part_->wal()->lastLogId(); sendingSnapshot_ = true; part_->snapshot_->sendSnapshot(part_, addr_) .thenValue([self = shared_from_this()](auto&& status) { @@ -342,44 +341,44 @@ nebula::cpp2::ErrorCode Host::startSendSnapshot() { self->lastLogIdSent_ = commitLogIdAndTerm.first; self->lastLogTermSent_ = commitLogIdAndTerm.second; self->followerCommittedLogId_ = commitLogIdAndTerm.first; - LOG(INFO) << self->idStr_ << "Send snapshot succeeded!" - << " commitLogId = " << commitLogIdAndTerm.first - << " commitLogTerm = " << commitLogIdAndTerm.second; + VLOG(1) << self->idStr_ << "Send snapshot succeeded!" + << " commitLogId = " << commitLogIdAndTerm.first + << " commitLogTerm = " << commitLogIdAndTerm.second; } else { - LOG(INFO) << self->idStr_ << "Send snapshot failed!"; + VLOG(1) << self->idStr_ << "Send snapshot failed!"; // TODO(heng): we should tell the follower i am failed. } self->sendingSnapshot_ = false; self->noMoreRequestCV_.notify_all(); }); } else { - LOG_EVERY_N(INFO, 100) << idStr_ << "The snapshot req is in queue, please wait for a moment"; + VLOG_EVERY_N(2, 1000) << idStr_ << "The snapshot req is in queue, please wait for a moment"; } return nebula::cpp2::ErrorCode::E_RAFT_WAITING_SNAPSHOT; } folly::Future Host::sendAppendLogRequest( folly::EventBase* eb, std::shared_ptr req) { - VLOG(2) << idStr_ << "Entering Host::sendAppendLogRequest()"; + VLOG(4) << idStr_ << "Entering Host::sendAppendLogRequest()"; { std::lock_guard g(lock_); auto res = canAppendLog(); if (res != nebula::cpp2::ErrorCode::SUCCEEDED) { - LOG(WARNING) << idStr_ << "The Host is not in a proper status, do not send"; + VLOG(3) << idStr_ << "The Host is not in a proper status, do not send"; cpp2::AppendLogResponse resp; resp.error_code_ref() = res; return resp; } } - LOG_IF(INFO, FLAGS_trace_raft) << idStr_ << "Sending appendLog: space " << req->get_space() - << ", part " << req->get_part() << ", current term " - << req->get_current_term() << ", committed_id " - << req->get_committed_log_id() << ", last_log_term_sent " - << req->get_last_log_term_sent() << ", last_log_id_sent " - << req->get_last_log_id_sent() << ", logs in request " - << req->get_log_str_list().size(); + VLOG_IF(1, FLAGS_trace_raft) << idStr_ << "Sending appendLog: space " << req->get_space() + << ", part " << req->get_part() << ", current term " + << req->get_current_term() << ", committed_id " + << req->get_committed_log_id() << ", last_log_term_sent " + << req->get_last_log_term_sent() << ", last_log_id_sent " + << req->get_last_log_id_sent() << ", logs in request " + << req->get_log_str_list().size(); // Get client connection auto client = part_->clientMan_->client(addr_, eb, false, FLAGS_raft_rpc_timeout_ms); return client->future_appendLog(*req); @@ -402,7 +401,7 @@ folly::Future Host::sendHeartbeat( .via(eb) .then([self = shared_from_this(), pro = std::move(promise)](folly::Try&& t) mutable { - VLOG(3) << self->idStr_ << "heartbeat call got response"; + VLOG(4) << self->idStr_ << "heartbeat call got response"; if (t.hasException()) { cpp2::HeartbeatResponse resp; resp.error_code_ref() = nebula::cpp2::ErrorCode::E_RAFT_RPC_EXCEPTION; @@ -417,25 +416,25 @@ folly::Future Host::sendHeartbeat( folly::Future Host::sendHeartbeatRequest( folly::EventBase* eb, std::shared_ptr req) { - VLOG(2) << idStr_ << "Entering Host::sendHeartbeatRequest()"; + VLOG(4) << idStr_ << "Entering Host::sendHeartbeatRequest()"; { std::lock_guard g(lock_); auto res = canAppendLog(); if (res != nebula::cpp2::ErrorCode::SUCCEEDED) { - LOG(WARNING) << idStr_ << "The Host is not in a proper status, do not send"; + VLOG(3) << idStr_ << "The Host is not in a proper status, do not send"; cpp2::HeartbeatResponse resp; resp.error_code_ref() = res; return resp; } } - LOG_IF(INFO, FLAGS_trace_raft) << idStr_ << "Sending heartbeat: space " << req->get_space() - << ", part " << req->get_part() << ", current term " - << req->get_current_term() << ", committed_id " - << req->get_committed_log_id() << ", last_log_term_sent " - << req->get_last_log_term_sent() << ", last_log_id_sent " - << req->get_last_log_id_sent(); + VLOG_IF(1, FLAGS_trace_raft) << idStr_ << "Sending heartbeat: space " << req->get_space() + << ", part " << req->get_part() << ", current term " + << req->get_current_term() << ", committed_id " + << req->get_committed_log_id() << ", last_log_term_sent " + << req->get_last_log_term_sent() << ", last_log_id_sent " + << req->get_last_log_id_sent(); // Get client connection auto client = part_->clientMan_->client(addr_, eb, false, FLAGS_raft_rpc_timeout_ms); return client->future_heartbeat(*req); @@ -464,9 +463,9 @@ std::shared_ptr Host::getPendingReqIfAny(std::shared_ptr self->logIdToSend_ = std::get<1>(tup); self->committedLogId_ = std::get<2>(tup); - LOG_IF(INFO, FLAGS_trace_raft) << self->idStr_ << "Sending the pending request in the queue" - << ", from " << self->lastLogIdSent_ + 1 << " to " - << self->logIdToSend_; + VLOG_IF(1, FLAGS_trace_raft) << self->idStr_ << "Sending the pending request in the queue" + << ", from " << self->lastLogIdSent_ + 1 << " to " + << self->logIdToSend_; self->pendingReq_ = std::make_tuple(0, 0, 0); self->promise_ = std::move(self->cachingPromise_); self->cachingPromise_ = folly::SharedPromise(); diff --git a/src/kvstore/raftex/Host.h b/src/kvstore/raftex/Host.h index 41781c9e540..ef27f752d25 100644 --- a/src/kvstore/raftex/Host.h +++ b/src/kvstore/raftex/Host.h @@ -24,37 +24,64 @@ namespace raftex { class RaftPart; +/** + * @brief Host is a class to monitor how many log has been sent to a raft peer. It will send logs or + * start elelction to the remote peer by rpc + */ class Host final : public std::enable_shared_from_this { friend class RaftPart; public: + /** + * @brief Construct a new Host + * + * @param addr Target peer address + * @param part Related RaftPart + * @param isLearner Whether target is a learner + */ Host(const HostAddr& addr, std::shared_ptr part, bool isLearner = false); + /** + * @brief Destroy the Host + */ ~Host() { - LOG(INFO) << idStr_ << " The host has been destroyed!"; + VLOG(1) << idStr_ << " The host has been destroyed!"; } + /** + * @brief The str of the Host, used in logging + */ const char* idStr() const { return idStr_.c_str(); } - // This will be called when the shard lost its leadership + /** + * @brief This will be called when the RaftPart lost its leadership + */ void pause() { std::lock_guard g(lock_); paused_ = true; } - // This will be called when the shard becomes the leader + /** + * @brief This will be called when the RaftPart becomes the leader + */ void resume() { std::lock_guard g(lock_); paused_ = false; } + /** + * @brief This will be called when the RaftPart is stopped + */ void stop() { std::lock_guard g(lock_); stopped_ = true; } + /** + * @brief Reset all state, should be called when the RaftPart becomes the leader + */ void reset() { std::unique_lock g(lock_); noMoreRequestCV_.wait(g, [this] { return !requestOnGoing_; }); @@ -67,55 +94,135 @@ class Host final : public std::enable_shared_from_this { followerCommittedLogId_ = 0; } + /** + * @brief Wait for all requests in flight finish or timeout + */ void waitForStop(); + /** + * @brief Return whether the target peer is a raft learner + */ bool isLearner() const { return isLearner_; } + /** + * @brief Set the state in Host of a raft peer as learner + */ void setLearner(bool isLearner) { isLearner_ = isLearner; } + /** + * @brief Send the leader election rpc to the peer + * + * @param req The RPC request + * @param eb The eventbase to send rpc + * @return folly::Future + */ folly::Future askForVote(const cpp2::AskForVoteRequest& req, folly::EventBase* eb); - // When logId == lastLogIdSent, it is a heartbeat - folly::Future appendLogs( - folly::EventBase* eb, - TermID term, // Current term - LogID logId, // The last log to be sent - LogID committedLogId, // The last committed log id - TermID lastLogTermSent, // The last log term being sent - LogID lastLogIdSent); // The last log id being sent - + /** + * @brief Send the append log to the peer + * + * @param eb The eventbase to send rpc + * @param term The term of RaftPart + * @param logId The last log to be sent + * @param committedLogId The last committed log id + * @param lastLogTermSent The last log term being sent + * @param lastLogIdSent The last log id being sent + * @return folly::Future + */ + folly::Future appendLogs(folly::EventBase* eb, + TermID term, + LogID logId, + LogID committedLogId, + TermID lastLogTermSent, + LogID lastLogIdSent); + + /** + * @brief Send the heartbeat to the peer + * + * @param eb The eventbase to send rpc + * @param term The term of RaftPart + * @param logId The last log to be sent + * @param committedLogId The last committed log id + * @param lastLogTermSent The last log term being sent + * @param lastLogIdSent The last log id being sent + * @return folly::Future + */ folly::Future sendHeartbeat( folly::EventBase* eb, TermID term, LogID commitLogId, TermID lastLogTerm, LogID lastLogId); + /** + * @brief Return the peer address + */ const HostAddr& address() const { return addr_; } private: + /** + * @brief Whether Host can send rpc to the peer + */ nebula::cpp2::ErrorCode canAppendLog() const; + /** + * @brief Send append log rpc + * + * @param eb The eventbase to send rpc + * @param req The rpc request + * @return folly::Future + */ folly::Future sendAppendLogRequest( folly::EventBase* eb, std::shared_ptr req); + /** + * @brief Send the append log rpc and handle the response + * + * @param eb The eventbase to send rpc + * @param req The rpc request + */ void appendLogsInternal(folly::EventBase* eb, std::shared_ptr req); folly::Future sendHeartbeatRequest( folly::EventBase* eb, std::shared_ptr req); + /** + * @brief Build the append log request based on the log id + * + * @return ErrorOr> + */ ErrorOr> prepareAppendLogRequest(); + /** + * @brief Begin to start snapshot when we don't have the log in wal file + * + * @return nebula::cpp2::ErrorCode + */ nebula::cpp2::ErrorCode startSendSnapshot(); + /** + * @brief Return true if there isn't a request in flight + */ bool noRequest() const; - void setResponse(const cpp2::AppendLogResponse& r); - + /** + * @brief Notify the RaftPart the result of sending logs to peers + * + * @param resp RPC response + */ + void setResponse(const cpp2::AppendLogResponse& resp); + + /** + * @brief If there are more logs to send, build the append log request + * + * @param self Shared ptr of Host itself + * @return std::shared_ptr The request if there are logs to send, return + * nullptr there are none + */ std::shared_ptr getPendingReqIfAny(std::shared_ptr self); private: diff --git a/src/kvstore/raftex/RaftLogIterator.h b/src/kvstore/raftex/RaftLogIterator.h index 64d45c3b093..6e850c4b496 100644 --- a/src/kvstore/raftex/RaftLogIterator.h +++ b/src/kvstore/raftex/RaftLogIterator.h @@ -13,20 +13,49 @@ namespace nebula { namespace raftex { +/** + * @brief The wal log iterator used when follower received logs from leader by rpc + */ class RaftLogIterator final : public LogIterator { public: + /** + * @brief Construct a new raf log iterator + * + * @param firstLogId First log id in iterator + * @param logEntries Log entries from rpc request + */ RaftLogIterator(LogID firstLogId, std::vector logEntries); + /** + * @brief Move forward iterator to next log entry + * + * @return LogIterator& + */ RaftLogIterator& operator++() override; + /** + * @brief Return whether log iterator is valid + */ bool valid() const override; + /** + * @brief Return the log id pointed by current iterator + */ LogID logId() const override; + /** + * @brief Return the log term pointed by current iterator + */ TermID logTerm() const override; + /** + * @brief Return the log source pointed by current iterator + */ ClusterID logSource() const override; + /** + * @brief Return the log message pointed by current iterator + */ folly::StringPiece logMsg() const override; private: diff --git a/src/kvstore/raftex/RaftPart.cpp b/src/kvstore/raftex/RaftPart.cpp index 4d49bf60972..645e0b60fcd 100644 --- a/src/kvstore/raftex/RaftPart.cpp +++ b/src/kvstore/raftex/RaftPart.cpp @@ -210,7 +210,7 @@ RaftPart::RaftPart( PartitionID partId, HostAddr localAddr, const folly::StringPiece walRoot, - std::shared_ptr pool, + std::shared_ptr ioPool, std::shared_ptr workers, std::shared_ptr executor, std::shared_ptr snapshotMan, @@ -225,7 +225,7 @@ RaftPart::RaftPart( status_{Status::STARTING}, role_{Role::FOLLOWER}, leader_{"", 0}, - ioThreadPool_{pool}, + ioThreadPool_{ioPool}, bgWorkers_{workers}, executor_(executor), snapshot_(snapshotMan), @@ -256,7 +256,7 @@ RaftPart::~RaftPart() { // Make sure the partition has stopped CHECK(status_ == Status::STOPPED); - LOG(INFO) << idStr_ << " The part has been destroyed..."; + VLOG(1) << idStr_ << " The part has been destroyed..."; } const char* RaftPart::roleStr(Role role) const { @@ -290,21 +290,20 @@ void RaftPart::start(std::vector&& peers, bool asLearner) { committedLogTerm_ = logIdAndTerm.second; if (lastLogId_ < committedLogId_) { - LOG(INFO) << idStr_ << "Reset lastLogId " << lastLogId_ << " to be the committedLogId " - << committedLogId_; + VLOG(1) << idStr_ << "Reset lastLogId " << lastLogId_ << " to be the committedLogId " + << committedLogId_; lastLogId_ = committedLogId_; lastLogTerm_ = committedLogTerm_; wal_->reset(); } - LOG(INFO) << idStr_ << "There are " << peers.size() << " peer hosts, and total " - << peers.size() + 1 << " copies. The quorum is " << quorum_ + 1 << ", as learner " - << asLearner << ", lastLogId " << lastLogId_ << ", lastLogTerm " << lastLogTerm_ - << ", committedLogId " << committedLogId_ << ", committedLogTerm " << committedLogTerm_ - << ", term " << term_; + VLOG(1) << idStr_ << "There are " << peers.size() << " peer hosts, and total " << peers.size() + 1 + << " copies. The quorum is " << quorum_ + 1 << ", as learner " << asLearner + << ", lastLogId " << lastLogId_ << ", lastLogTerm " << lastLogTerm_ << ", committedLogId " + << committedLogId_ << ", committedLogTerm " << committedLogTerm_ << ", term " << term_; // Start all peer hosts for (auto& addr : peers) { - LOG(INFO) << idStr_ << "Add peer " << addr; + VLOG(1) << idStr_ << "Add peer " << addr; auto hostPtr = std::make_shared(addr, shared_from_this()); hosts_.emplace_back(hostPtr); } @@ -324,7 +323,7 @@ void RaftPart::start(std::vector&& peers, bool asLearner) { } void RaftPart::stop() { - VLOG(2) << idStr_ << "Stopping the partition"; + VLOG(1) << idStr_ << "Stopping the partition"; decltype(hosts_) hosts; { @@ -340,25 +339,25 @@ void RaftPart::stop() { h->stop(); } - VLOG(2) << idStr_ << "Invoked stop() on all peer hosts"; + VLOG(1) << idStr_ << "Invoked stop() on all peer hosts"; for (auto& h : hosts) { - VLOG(2) << idStr_ << "Waiting " << h->idStr() << " to stop"; + VLOG(1) << idStr_ << "Waiting " << h->idStr() << " to stop"; h->waitForStop(); - VLOG(2) << idStr_ << h->idStr() << "has stopped"; + VLOG(1) << idStr_ << h->idStr() << "has stopped"; } hosts.clear(); - LOG(INFO) << idStr_ << "Partition has been stopped"; + VLOG(1) << idStr_ << "Partition has been stopped"; } nebula::cpp2::ErrorCode RaftPart::canAppendLogs() { DCHECK(!raftLock_.try_lock()); if (UNLIKELY(status_ != Status::RUNNING)) { - LOG(ERROR) << idStr_ << "The partition is not running"; + VLOG(3) << idStr_ << "The partition is not running"; return nebula::cpp2::ErrorCode::E_RAFT_STOPPED; } if (UNLIKELY(role_ != Role::LEADER)) { - LOG_EVERY_N(WARNING, 1000) << idStr_ << "The partition is not a leader"; + VLOG_EVERY_N(2, 1000) << idStr_ << "The partition is not a leader"; return nebula::cpp2::ErrorCode::E_LEADER_CHANGED; } return nebula::cpp2::ErrorCode::SUCCEEDED; @@ -371,7 +370,7 @@ nebula::cpp2::ErrorCode RaftPart::canAppendLogs(TermID termId) { return rc; } if (UNLIKELY(term_ != termId)) { - VLOG(2) << idStr_ << "Term has been updated, origin " << termId << ", new " << term_; + VLOG(3) << idStr_ << "Term has been updated, origin " << termId << ", new " << term_; return nebula::cpp2::ErrorCode::E_RAFT_TERM_OUT_OF_DATE; } return nebula::cpp2::ErrorCode::SUCCEEDED; @@ -380,29 +379,29 @@ nebula::cpp2::ErrorCode RaftPart::canAppendLogs(TermID termId) { void RaftPart::addLearner(const HostAddr& addr) { CHECK(!raftLock_.try_lock()); if (addr == addr_) { - LOG(INFO) << idStr_ << "I am learner!"; + VLOG(1) << idStr_ << "I am learner!"; return; } auto it = std::find_if( hosts_.begin(), hosts_.end(), [&addr](const auto& h) { return h->address() == addr; }); if (it == hosts_.end()) { hosts_.emplace_back(std::make_shared(addr, shared_from_this(), true)); - LOG(INFO) << idStr_ << "Add learner " << addr; + VLOG(1) << idStr_ << "Add learner " << addr; } else { - LOG(INFO) << idStr_ << "The host " << addr << " has been existed as " - << ((*it)->isLearner() ? " learner " : " group member"); + VLOG(1) << idStr_ << "The host " << addr << " has been existed as " + << ((*it)->isLearner() ? " learner " : " group member"); } } void RaftPart::preProcessTransLeader(const HostAddr& target) { CHECK(!raftLock_.try_lock()); - LOG(INFO) << idStr_ << "Pre process transfer leader to " << target; + VLOG(1) << idStr_ << "Pre process transfer leader to " << target; switch (role_) { case Role::FOLLOWER: { if (target != addr_ && target != HostAddr("", 0)) { - LOG(INFO) << idStr_ << "I am follower, just wait for the new leader."; + VLOG(1) << idStr_ << "I am follower, just wait for the new leader."; } else { - LOG(INFO) << idStr_ << "I will be the new leader, trigger leader election now!"; + VLOG(1) << idStr_ << "I will be the new leader, trigger leader election now!"; bgWorkers_->addTask([self = shared_from_this()] { { std::lock_guard lck(self->raftLock_); @@ -416,8 +415,8 @@ void RaftPart::preProcessTransLeader(const HostAddr& target) { break; } default: { - LOG(INFO) << idStr_ << "My role is " << roleStr(role_) - << ", so do nothing when pre process transfer leader"; + VLOG(1) << idStr_ << "My role is " << roleStr(role_) + << ", so do nothing when pre process transfer leader"; break; } } @@ -425,7 +424,7 @@ void RaftPart::preProcessTransLeader(const HostAddr& target) { void RaftPart::commitTransLeader(const HostAddr& target) { bool needToUnlock = raftLock_.try_lock(); - LOG(INFO) << idStr_ << "Commit transfer leader to " << target; + VLOG(1) << idStr_ << "Commit transfer leader to " << target; switch (role_) { case Role::LEADER: { if (target != addr_ && !hosts_.empty()) { @@ -438,20 +437,20 @@ void RaftPart::commitTransLeader(const HostAddr& target) { for (auto& host : hosts_) { host->pause(); } - LOG(INFO) << idStr_ << "Give up my leadership!"; + VLOG(1) << idStr_ << "Give up my leadership!"; } } else { - LOG(INFO) << idStr_ << "I am already the leader!"; + VLOG(1) << idStr_ << "I am already the leader!"; } break; } case Role::FOLLOWER: case Role::CANDIDATE: { - LOG(INFO) << idStr_ << "I am " << roleStr(role_) << ", just wait for the new leader!"; + VLOG(1) << idStr_ << "I am " << roleStr(role_) << ", just wait for the new leader!"; break; } case Role::LEARNER: { - LOG(INFO) << idStr_ << "I am learner, not in the raft group, skip the log"; + VLOG(1) << idStr_ << "I am learner, not in the raft group, skip the log"; break; } } @@ -475,11 +474,11 @@ void RaftPart::addPeer(const HostAddr& peer) { CHECK(!raftLock_.try_lock()); if (peer == addr_) { if (role_ == Role::LEARNER) { - LOG(INFO) << idStr_ << "I am learner, promote myself to be follower"; + VLOG(1) << idStr_ << "I am learner, promote myself to be follower"; role_ = Role::FOLLOWER; updateQuorum(); } else { - LOG(INFO) << idStr_ << "I am already in the raft group!"; + VLOG(1) << idStr_ << "I am already in the raft group!"; } return; } @@ -488,14 +487,14 @@ void RaftPart::addPeer(const HostAddr& peer) { if (it == hosts_.end()) { hosts_.emplace_back(std::make_shared(peer, shared_from_this())); updateQuorum(); - LOG(INFO) << idStr_ << "Add peer " << peer; + VLOG(1) << idStr_ << "Add peer " << peer; } else { if ((*it)->isLearner()) { - LOG(INFO) << idStr_ << "The host " << peer << " has been existed as learner, promote it!"; + VLOG(1) << idStr_ << "The host " << peer << " has been existed as learner, promote it!"; (*it)->setLearner(false); updateQuorum(); } else { - LOG(INFO) << idStr_ << "The host " << peer << " has been existed as follower!"; + VLOG(1) << idStr_ << "The host " << peer << " has been existed as follower!"; } } } @@ -504,22 +503,22 @@ void RaftPart::removePeer(const HostAddr& peer) { CHECK(!raftLock_.try_lock()); if (peer == addr_) { // The part will be removed in REMOVE_PART_ON_SRC phase - LOG(INFO) << idStr_ << "Remove myself from the raft group."; + VLOG(1) << idStr_ << "Remove myself from the raft group."; return; } auto it = std::find_if( hosts_.begin(), hosts_.end(), [&peer](const auto& h) { return h->address() == peer; }); if (it == hosts_.end()) { - LOG(INFO) << idStr_ << "The peer " << peer << " not exist!"; + VLOG(1) << idStr_ << "The peer " << peer << " not exist!"; } else { if ((*it)->isLearner()) { - LOG(INFO) << idStr_ << "The peer is learner, remove it directly!"; + VLOG(1) << idStr_ << "The peer is learner, remove it directly!"; hosts_.erase(it); return; } hosts_.erase(it); updateQuorum(); - LOG(INFO) << idStr_ << "Remove peer " << peer; + VLOG(1) << idStr_ << "Remove peer " << peer; } } @@ -530,7 +529,7 @@ nebula::cpp2::ErrorCode RaftPart::checkPeer(const HostAddr& candidate) { return h->address() == candidate; }); if (it == hosts.end()) { - LOG(INFO) << idStr_ << "The candidate " << candidate << " is not in my peers"; + VLOG(2) << idStr_ << "The candidate " << candidate << " is not in my peers"; return nebula::cpp2::ErrorCode::E_RAFT_INVALID_PEER; } return nebula::cpp2::ErrorCode::SUCCEEDED; @@ -539,7 +538,7 @@ nebula::cpp2::ErrorCode RaftPart::checkPeer(const HostAddr& candidate) { void RaftPart::addListenerPeer(const HostAddr& listener) { std::lock_guard guard(raftLock_); if (listener == addr_) { - LOG(INFO) << idStr_ << "I am already in the raft group"; + VLOG(1) << idStr_ << "I am already in the raft group"; return; } auto it = std::find_if(hosts_.begin(), hosts_.end(), [&listener](const auto& h) { @@ -549,34 +548,34 @@ void RaftPart::addListenerPeer(const HostAddr& listener) { // Add listener as a raft learner hosts_.emplace_back(std::make_shared(listener, shared_from_this(), true)); listeners_.emplace(listener); - LOG(INFO) << idStr_ << "Add listener " << listener; + VLOG(1) << idStr_ << "Add listener " << listener; } else { - LOG(INFO) << idStr_ << "The listener " << listener << " has joined raft group before"; + VLOG(1) << idStr_ << "The listener " << listener << " has joined raft group before"; } } void RaftPart::removeListenerPeer(const HostAddr& listener) { std::lock_guard guard(raftLock_); if (listener == addr_) { - LOG(INFO) << idStr_ << "Remove myself from the raft group"; + VLOG(1) << idStr_ << "Remove myself from the raft group"; return; } auto it = std::find_if(hosts_.begin(), hosts_.end(), [&listener](const auto& h) { return h->address() == listener; }); if (it == hosts_.end()) { - LOG(INFO) << idStr_ << "The listener " << listener << " not found"; + VLOG(1) << idStr_ << "The listener " << listener << " not found"; } else { hosts_.erase(it); listeners_.erase(listener); - LOG(INFO) << idStr_ << "Remove listener " << listener; + VLOG(1) << idStr_ << "Remove listener " << listener; } } void RaftPart::preProcessRemovePeer(const HostAddr& peer) { CHECK(!raftLock_.try_lock()); if (role_ == Role::LEADER) { - LOG(INFO) << idStr_ << "I am leader, skip remove peer in preProcessLog"; + VLOG(1) << idStr_ << "I am leader, skip remove peer in preProcessLog"; return; } removePeer(peer); @@ -590,7 +589,7 @@ void RaftPart::commitRemovePeer(const HostAddr& peer) { } }; if (role_ == Role::FOLLOWER || role_ == Role::LEARNER) { - LOG(INFO) << idStr_ << "I am " << roleStr(role_) << ", skip remove peer in commit"; + VLOG(1) << idStr_ << "I am " << roleStr(role_) << ", skip remove peer in commit"; return; } CHECK(Role::LEADER == role_); @@ -627,28 +626,25 @@ folly::Future RaftPart::appendLogAsync(ClusterID source auto retFuture = folly::Future::makeEmpty(); if (bufferOverFlow_) { - LOG_EVERY_N(WARNING, 100) << idStr_ - << "The appendLog buffer is full." - " Please slow down the log appending rate." - << "replicatingLogs_ :" << replicatingLogs_; + VLOG_EVERY_N(2, 1000) + << idStr_ << "The appendLog buffer is full. Please slow down the log appending rate." + << "replicatingLogs_ :" << replicatingLogs_; return nebula::cpp2::ErrorCode::E_RAFT_BUFFER_OVERFLOW; } { std::lock_guard lck(logsLock_); - VLOG(2) << idStr_ << "Checking whether buffer overflow"; + VLOG(4) << idStr_ << "Checking whether buffer overflow"; if (logs_.size() >= FLAGS_max_batch_size) { // Buffer is full - LOG(WARNING) << idStr_ - << "The appendLog buffer is full." - " Please slow down the log appending rate." - << "replicatingLogs_ :" << replicatingLogs_; + VLOG(2) << idStr_ << "The appendLog buffer is full. Please slow down the log appending rate." + << "replicatingLogs_ :" << replicatingLogs_; bufferOverFlow_ = true; return nebula::cpp2::ErrorCode::E_RAFT_BUFFER_OVERFLOW; } - VLOG(2) << idStr_ << "Appending logs to the buffer"; + VLOG(4) << idStr_ << "Appending logs to the buffer"; // Append new logs to the buffer DCHECK_GE(source, 0); @@ -668,13 +664,13 @@ folly::Future RaftPart::appendLogAsync(ClusterID source bool expected = false; if (replicatingLogs_.compare_exchange_strong(expected, true)) { // We need to send logs to all followers - VLOG(2) << idStr_ << "Preparing to send AppendLog request"; + VLOG(4) << idStr_ << "Preparing to send AppendLog request"; sendingPromise_ = std::move(cachingPromise_); cachingPromise_.reset(); std::swap(swappedOutLogs, logs_); bufferOverFlow_ = false; } else { - VLOG(2) << idStr_ << "Another AppendLogs request is ongoing, just return"; + VLOG(4) << idStr_ << "Another AppendLogs request is ongoing, just return"; return retFuture; } } @@ -693,14 +689,14 @@ folly::Future RaftPart::appendLogAsync(ClusterID source if (!checkAppendLogResult(res)) { // Mosy likely failed because the partition is not leader - LOG_EVERY_N(WARNING, 1000) << idStr_ << "Cannot append logs, clean the buffer"; + VLOG_EVERY_N(2, 1000) << idStr_ << "Cannot append logs, clean the buffer"; return res; } // Replicate buffered logs to all followers // Replication will happen on a separate thread and will block // until majority accept the logs, the leadership changes, or // the partition stops - VLOG(2) << idStr_ << "Calling appendLogsInternal()"; + VLOG(4) << idStr_ << "Calling appendLogsInternal()"; AppendLogsIterator it( firstId, termId, @@ -726,10 +722,10 @@ void RaftPart::appendLogsInternal(AppendLogsIterator iter, TermID termId) { LogID committed = 0; LogID lastId = 0; if (iter.valid()) { - VLOG(2) << idStr_ << "Ready to append logs from id " << iter.logId() << " (Current term is " + VLOG(4) << idStr_ << "Ready to append logs from id " << iter.logId() << " (Current term is " << currTerm << ")"; } else { - LOG(ERROR) << idStr_ << "Only happened when Atomic op failed"; + VLOG(4) << idStr_ << "Only happened when Atomic op failed"; replicatingLogs_ = false; return; } @@ -747,7 +743,7 @@ void RaftPart::appendLogsInternal(AppendLogsIterator iter, TermID termId) { // Step 1: Write WAL SlowOpTracker tracker; if (!wal_->appendLogs(iter)) { - LOG_EVERY_N(WARNING, 100) << idStr_ << "Failed to write into WAL"; + VLOG_EVERY_N(2, 1000) << idStr_ << "Failed to write into WAL"; res = nebula::cpp2::ErrorCode::E_RAFT_WAL_FAIL; lastLogId_ = wal_->lastLogId(); lastLogTerm_ = wal_->lastLogTerm(); @@ -757,12 +753,11 @@ void RaftPart::appendLogsInternal(AppendLogsIterator iter, TermID termId) { if (tracker.slow()) { tracker.output(idStr_, folly::stringPrintf("Write WAL, total %ld", lastId - prevLogId + 1)); } - VLOG(2) << idStr_ << "Succeeded writing logs [" << iter.firstLogId() << ", " << lastId + VLOG(4) << idStr_ << "Succeeded writing logs [" << iter.firstLogId() << ", " << lastId << "] to WAL"; } while (false); if (!checkAppendLogResult(res)) { - LOG_EVERY_N(WARNING, 100) << idStr_ << "Failed to write wal"; return; } // Step 2: Replicate to followers @@ -794,12 +789,12 @@ void RaftPart::replicateLogs(folly::EventBase* eb, } while (false); if (!checkAppendLogResult(res)) { - LOG(WARNING) << idStr_ << "replicateLogs failed because of not leader or term changed"; + VLOG(3) << idStr_ << "replicateLogs failed because of not leader or term changed"; return; } - LOG_IF(INFO, FLAGS_trace_raft) << idStr_ << "About to replicate logs in range [" - << iter.firstLogId() << ", " << lastLogId << "] to all peer hosts"; + VLOG_IF(1, FLAGS_trace_raft) << idStr_ << "About to replicate logs in range [" + << iter.firstLogId() << ", " << lastLogId << "] to all peer hosts"; lastMsgSentDur_.reset(); SlowOpTracker tracker; @@ -811,7 +806,7 @@ void RaftPart::replicateLogs(folly::EventBase* eb, prevLogId, prevLogTerm, committedId](std::shared_ptr hostPtr) { - VLOG(2) << self->idStr_ << "Appending logs to " << hostPtr->idStr(); + VLOG(4) << self->idStr_ << "Appending logs to " << hostPtr->idStr(); return via(eb, [=]() -> Future { return hostPtr->appendLogs( eb, currTerm, lastLogId, committedId, prevLogTerm, prevLogId); @@ -836,7 +831,7 @@ void RaftPart::replicateLogs(folly::EventBase* eb, prevLogTerm, pHosts = std::move(hosts), tracker](folly::Try&& result) mutable { - VLOG(2) << self->idStr_ << "Received enough response"; + VLOG(4) << self->idStr_ << "Received enough response"; CHECK(!result.hasException()); if (tracker.slow()) { tracker.output(self->idStr_, @@ -894,7 +889,7 @@ void RaftPart::processAppendLogResponses(const AppendLogResponses& resps, if (numSucceeded >= quorum_) { // Majority have succeeded - VLOG(2) << idStr_ << numSucceeded << " hosts have accepted the logs"; + VLOG(4) << idStr_ << numSucceeded << " hosts have accepted the logs"; LogID firstLogId = 0; do { @@ -910,9 +905,9 @@ void RaftPart::processAppendLogResponses(const AppendLogResponses& resps, } while (false); if (!checkAppendLogResult(res)) { - LOG(WARNING) << idStr_ - << "processAppendLogResponses failed because of not leader " - "or term changed"; + VLOG(3) << idStr_ + << "processAppendLogResponses failed because of not leader " + "or term changed"; return; } @@ -942,7 +937,7 @@ void RaftPart::processAppendLogResponses(const AppendLogResponses& resps, tracker.output(idStr_, folly::stringPrintf("Total commit: %ld", committedLogId_ - committedId)); } - VLOG(2) << idStr_ << "Leader succeeded in committing the logs " << committedId + 1 << " to " + VLOG(4) << idStr_ << "Leader succeeded in committing the logs " << committedId + 1 << " to " << lastLogId; } @@ -963,7 +958,7 @@ void RaftPart::processAppendLogResponses(const AppendLogResponses& resps, // If no more valid logs to be replicated in iter, create a new one if we // have new log if (iter.empty()) { - VLOG(2) << idStr_ << "logs size " << logs_.size(); + VLOG(4) << idStr_ << "logs size " << logs_.size(); if (logs_.size() > 0) { // continue to replicate the logs sendingPromise_ = std::move(cachingPromise_); @@ -990,7 +985,7 @@ void RaftPart::processAppendLogResponses(const AppendLogResponses& resps, // and all of them failed, which would make iter is empty again if (iter.empty()) { replicatingLogs_ = false; - VLOG(2) << idStr_ << "No more log to be replicated"; + VLOG(4) << idStr_ << "No more log to be replicated"; return; } } @@ -998,8 +993,8 @@ void RaftPart::processAppendLogResponses(const AppendLogResponses& resps, this->appendLogsInternal(std::move(iter), currTerm); } else { // Not enough hosts accepted the log, re-try - LOG_EVERY_N(WARNING, 100) << idStr_ << "Only " << numSucceeded - << " hosts succeeded, Need to try again"; + VLOG_EVERY_N(2, 1000) << idStr_ << "Only " << numSucceeded + << " hosts succeeded, Need to try again"; usleep(1000); replicateLogs(eb, std::move(iter), currTerm, lastLogId, committedId, prevLogTerm, prevLogId); } @@ -1015,8 +1010,8 @@ bool RaftPart::needToStartElection() { if (status_ == Status::RUNNING && role_ == Role::FOLLOWER && (lastMsgRecvDur_.elapsedInMSec() >= FLAGS_raft_heartbeat_interval_secs * 1000 || isBlindFollower_)) { - LOG(INFO) << idStr_ << "Start leader election, reason: lastMsgDur " - << lastMsgRecvDur_.elapsedInMSec() << ", term " << term_; + VLOG(1) << idStr_ << "Start leader election, reason: lastMsgDur " + << lastMsgRecvDur_.elapsedInMSec() << ", term " << term_; role_ = Role::CANDIDATE; leader_ = HostAddr("", 0); } @@ -1031,13 +1026,13 @@ bool RaftPart::prepareElectionRequest(cpp2::AskForVoteRequest& req, // Make sure the partition is running if (status_ != Status::RUNNING) { - VLOG(2) << idStr_ << "The partition is not running"; + VLOG(3) << idStr_ << "The partition is not running"; return false; } // Make sure the role is still CANDIDATE if (role_ != Role::CANDIDATE) { - VLOG(2) << idStr_ << "A leader has been elected"; + VLOG(3) << idStr_ << "A leader has been elected"; return false; } @@ -1083,23 +1078,23 @@ bool RaftPart::processElectionResponses(const RaftPart::ElectionResponses& resul std::lock_guard g(raftLock_); if (UNLIKELY(status_ == Status::STOPPED)) { - LOG(INFO) << idStr_ << "The part has been stopped, skip the request"; + VLOG(3) << idStr_ << "The part has been stopped, skip the request"; return false; } if (UNLIKELY(status_ == Status::STARTING)) { - LOG(INFO) << idStr_ << "The partition is still starting"; + VLOG(3) << idStr_ << "The partition is still starting"; return false; } if (UNLIKELY(status_ == Status::WAITING_SNAPSHOT)) { - LOG(INFO) << idStr_ << "The partition is still waiting snapshot"; + VLOG(3) << idStr_ << "The partition is still waiting snapshot"; return false; } if (role_ != Role::CANDIDATE) { - LOG(INFO) << idStr_ << "Partition's role has changed to " << roleStr(role_) - << " during the election, so discard the results"; + VLOG(3) << idStr_ << "Partition's role has changed to " << roleStr(role_) + << " during the election, so discard the results"; return false; } @@ -1107,9 +1102,9 @@ bool RaftPart::processElectionResponses(const RaftPart::ElectionResponses& resul // term changed during actual leader election if (!isPreVote && proposedTerm != term_) { - LOG(INFO) << idStr_ << "Partition's term has changed during election, " - << "so just ignore the respsonses, " - << "expected " << proposedTerm << ", actual " << term_; + VLOG(2) << idStr_ << "Partition's term has changed during election, " + << "so just ignore the respsonses, " + << "expected " << proposedTerm << ", actual " << term_; return false; } @@ -1119,10 +1114,9 @@ bool RaftPart::processElectionResponses(const RaftPart::ElectionResponses& resul if (r.second.get_error_code() == nebula::cpp2::ErrorCode::SUCCEEDED) { ++numSucceeded; } else { - LOG(WARNING) << idStr_ << "Receive response about askForVote from " - << hosts[r.first]->address() << ", error code is " - << apache::thrift::util::enumNameSafe(r.second.get_error_code()) - << ", isPreVote = " << isPreVote; + VLOG(2) << idStr_ << "Receive response about askForVote from " << hosts[r.first]->address() + << ", error code is " << apache::thrift::util::enumNameSafe(r.second.get_error_code()) + << ", isPreVote = " << isPreVote; } highestTerm = std::max(highestTerm, r.second.get_current_term()); } @@ -1136,9 +1130,9 @@ bool RaftPart::processElectionResponses(const RaftPart::ElectionResponses& resul if (numSucceeded >= quorum_) { if (isPreVote) { - LOG(INFO) << idStr_ << "Partition win prevote of term " << proposedTerm; + VLOG(1) << idStr_ << "Partition win prevote of term " << proposedTerm; } else { - LOG(INFO) << idStr_ << "Partition is elected as the new leader for term " << proposedTerm; + VLOG(1) << idStr_ << "Partition is elected as the new leader for term " << proposedTerm; term_ = proposedTerm; role_ = Role::LEADER; leader_ = addr_; @@ -1147,13 +1141,13 @@ bool RaftPart::processElectionResponses(const RaftPart::ElectionResponses& resul return true; } - LOG(INFO) << idStr_ << "Did not get enough votes from election of term " << proposedTerm - << ", isPreVote = " << isPreVote; + VLOG(1) << idStr_ << "Did not get enough votes from election of term " << proposedTerm + << ", isPreVote = " << isPreVote; return false; } folly::Future RaftPart::leaderElection(bool isPreVote) { - VLOG(2) << idStr_ << "Start leader election..."; + VLOG(1) << idStr_ << "Start leader election..."; using namespace folly; // NOLINT since the fancy overload of | operator bool expected = false; @@ -1181,13 +1175,13 @@ folly::Future RaftPart::leaderElection(bool isPreVote) { } // Send out the AskForVoteRequest - LOG(INFO) << idStr_ << "Sending out an election request " - << "(space = " << voteReq.get_space() << ", part = " << voteReq.get_part() - << ", term = " << voteReq.get_term() << ", lastLogId = " << voteReq.get_last_log_id() - << ", lastLogTerm = " << voteReq.get_last_log_term() - << ", candidateIP = " << voteReq.get_candidate_addr() - << ", candidatePort = " << voteReq.get_candidate_port() << ")" - << ", isPreVote = " << isPreVote; + VLOG(1) << idStr_ << "Sending out an election request " + << "(space = " << voteReq.get_space() << ", part = " << voteReq.get_part() + << ", term = " << voteReq.get_term() << ", lastLogId = " << voteReq.get_last_log_id() + << ", lastLogTerm = " << voteReq.get_last_log_term() + << ", candidateIP = " << voteReq.get_candidate_addr() + << ", candidatePort = " << voteReq.get_candidate_port() << ")" + << ", isPreVote = " << isPreVote; auto proposedTerm = voteReq.get_term(); auto resps = ElectionResponses(); @@ -1202,7 +1196,7 @@ folly::Future RaftPart::leaderElection(bool isPreVote) { collectNSucceeded( gen::from(hosts) | gen::map([eb, self = shared_from_this(), voteReq](std::shared_ptr host) { - VLOG(2) << self->idStr_ << "Sending AskForVoteRequest to " << host->idStr(); + VLOG(4) << self->idStr_ << "Sending AskForVoteRequest to " << host->idStr(); return via(eb, [voteReq, host, eb]() -> Future { return host->askForVote(voteReq, eb); }); @@ -1218,7 +1212,7 @@ folly::Future RaftPart::leaderElection(bool isPreVote) { .via(executor_.get()) .then([self = shared_from_this(), pro = std::move(promise), hosts, proposedTerm, isPreVote]( auto&& t) mutable { - VLOG(2) << self->idStr_ + VLOG(4) << self->idStr_ << "AskForVoteRequest has been sent to all peers, waiting for responses"; CHECK(!t.hasException()); pro.setValue( @@ -1275,11 +1269,11 @@ void RaftPart::statusPolling(int64_t startTime) { } else { // No leader has been elected, need to continue // (After sleeping a random period between [500ms, 2s]) - VLOG(2) << idStr_ << "Wait for a while and continue the leader election"; + VLOG(4) << idStr_ << "Wait for a while and continue the leader election"; delay = (folly::Random::rand32(1500) + 500); } } else if (needToSendHeartbeat()) { - VLOG(2) << idStr_ << "Need to send heartbeat"; + VLOG(4) << idStr_ << "Need to send heartbeat"; sendHeartbeat(); } if (needToCleanupSnapshot()) { @@ -1288,7 +1282,7 @@ void RaftPart::statusPolling(int64_t startTime) { { std::lock_guard g(raftLock_); if (status_ == Status::RUNNING || status_ == Status::WAITING_SNAPSHOT) { - VLOG(3) << idStr_ << "Schedule new task"; + VLOG(4) << idStr_ << "Schedule new task"; bgWorkers_->addDelayTask( delay, [self = shared_from_this(), startTime] { self->statusPolling(startTime); }); } @@ -1302,7 +1296,7 @@ bool RaftPart::needToCleanupSnapshot() { } void RaftPart::cleanupSnapshot() { - LOG(INFO) << idStr_ << "Clean up the snapshot"; + VLOG(1) << idStr_ << "Snapshot has not been received for a long time, clean up the snapshot"; std::lock_guard g(raftLock_); reset(); status_ = Status::RUNNING; @@ -1323,38 +1317,38 @@ bool RaftPart::needToCleanWal() { void RaftPart::processAskForVoteRequest(const cpp2::AskForVoteRequest& req, cpp2::AskForVoteResponse& resp) { - LOG(INFO) << idStr_ << "Received a VOTING request" - << ": space = " << req.get_space() << ", partition = " << req.get_part() - << ", candidateAddr = " << req.get_candidate_addr() << ":" << req.get_candidate_port() - << ", term = " << req.get_term() << ", lastLogId = " << req.get_last_log_id() - << ", lastLogTerm = " << req.get_last_log_term() - << ", isPreVote = " << req.get_is_pre_vote(); + VLOG(1) << idStr_ << "Received a VOTING request" + << ": space = " << req.get_space() << ", partition = " << req.get_part() + << ", candidateAddr = " << req.get_candidate_addr() << ":" << req.get_candidate_port() + << ", term = " << req.get_term() << ", lastLogId = " << req.get_last_log_id() + << ", lastLogTerm = " << req.get_last_log_term() + << ", isPreVote = " << req.get_is_pre_vote(); std::lock_guard g(raftLock_); resp.current_term_ref() = term_; // Make sure the partition is running if (UNLIKELY(status_ == Status::STOPPED)) { - LOG(INFO) << idStr_ << "The part has been stopped, skip the request"; + VLOG(3) << idStr_ << "The part has been stopped, skip the request"; resp.error_code_ref() = nebula::cpp2::ErrorCode::E_RAFT_STOPPED; return; } if (UNLIKELY(status_ == Status::STARTING)) { - LOG(INFO) << idStr_ << "The partition is still starting"; + VLOG(3) << idStr_ << "The partition is still starting"; resp.error_code_ref() = nebula::cpp2::ErrorCode::E_RAFT_NOT_READY; return; } if (UNLIKELY(status_ == Status::WAITING_SNAPSHOT)) { - LOG(INFO) << idStr_ << "The partition is still waiting snapshot"; + VLOG(3) << idStr_ << "The partition is still waiting snapshot"; resp.error_code_ref() = nebula::cpp2::ErrorCode::E_RAFT_WAITING_SNAPSHOT; return; } - LOG(INFO) << idStr_ << "The partition currently is a " << roleStr(role_) << ", lastLogId " - << lastLogId_ << ", lastLogTerm " << lastLogTerm_ << ", committedLogId " - << committedLogId_ << ", term " << term_; + VLOG(1) << idStr_ << "The partition currently is a " << roleStr(role_) << ", lastLogId " + << lastLogId_ << ", lastLogTerm " << lastLogTerm_ << ", committedLogId " + << committedLogId_ << ", term " << term_; if (role_ == Role::LEARNER) { resp.error_code_ref() = nebula::cpp2::ErrorCode::E_RAFT_BAD_ROLE; return; @@ -1369,9 +1363,9 @@ void RaftPart::processAskForVoteRequest(const cpp2::AskForVoteRequest& req, // Check term id if (req.get_term() < term_) { - LOG(INFO) << idStr_ << "The partition currently is on term " << term_ - << ", the term proposed by the candidate is " << req.get_term() - << ", so it will be rejected"; + VLOG(1) << idStr_ << "The partition currently is on term " << term_ + << ", the term proposed by the candidate is " << req.get_term() + << ", so it will be rejected"; resp.error_code_ref() = nebula::cpp2::ErrorCode::E_RAFT_TERM_OUT_OF_DATE; return; } @@ -1395,9 +1389,9 @@ void RaftPart::processAskForVoteRequest(const cpp2::AskForVoteRequest& req, // Check the last term to receive a log if (req.get_last_log_term() < lastLogTerm_) { - LOG(INFO) << idStr_ << "The partition's last term to receive a log is " << lastLogTerm_ - << ", which is newer than the candidate's log " << req.get_last_log_term() - << ". So the candidate will be rejected"; + VLOG(1) << idStr_ << "The partition's last term to receive a log is " << lastLogTerm_ + << ", which is newer than the candidate's log " << req.get_last_log_term() + << ". So the candidate will be rejected"; resp.error_code_ref() = nebula::cpp2::ErrorCode::E_RAFT_TERM_OUT_OF_DATE; return; } @@ -1405,9 +1399,9 @@ void RaftPart::processAskForVoteRequest(const cpp2::AskForVoteRequest& req, if (req.get_last_log_term() == lastLogTerm_) { // Check last log id if (req.get_last_log_id() < lastLogId_) { - LOG(INFO) << idStr_ << "The partition's last log id is " << lastLogId_ - << ". The candidate's last log id " << req.get_last_log_id() - << " is smaller, so it will be rejected, candidate is " << candidate; + VLOG(1) << idStr_ << "The partition's last log id is " << lastLogId_ + << ". The candidate's last log id " << req.get_last_log_id() + << " is smaller, so it will be rejected, candidate is " << candidate; resp.error_code_ref() = nebula::cpp2::ErrorCode::E_RAFT_LOG_STALE; return; } @@ -1423,16 +1417,16 @@ void RaftPart::processAskForVoteRequest(const cpp2::AskForVoteRequest& req, * majority votes: the candidate will be leader */ if (votedTerm_ == req.get_term() && votedAddr_ != candidate) { - LOG(INFO) << idStr_ << "We have voted " << votedAddr_ << " on term " << votedTerm_ - << ", so we should reject the candidate " << candidate << " request on term " - << req.get_term(); + VLOG(1) << idStr_ << "We have voted " << votedAddr_ << " on term " << votedTerm_ + << ", so we should reject the candidate " << candidate << " request on term " + << req.get_term(); resp.error_code_ref() = nebula::cpp2::ErrorCode::E_RAFT_TERM_OUT_OF_DATE; return; } // Ok, no reason to refuse, we will vote for the candidate - LOG(INFO) << idStr_ << "The partition will vote for the candidate " << candidate - << ", isPreVote = " << req.get_is_pre_vote(); + VLOG(1) << idStr_ << "The partition will vote for the candidate " << candidate + << ", isPreVote = " << req.get_is_pre_vote(); if (req.get_is_pre_vote()) { // return succeed if it is prevote, do not change any state @@ -1444,8 +1438,8 @@ void RaftPart::processAskForVoteRequest(const cpp2::AskForVoteRequest& req, // role_ and term_ has been set above if (oldRole == Role::LEADER) { if (wal_->lastLogId() > lastLogId_) { - LOG(INFO) << idStr_ << "There are some logs up to " << wal_->lastLogId() - << " update lastLogId_ " << lastLogId_ << " to wal's"; + VLOG(2) << idStr_ << "There are some logs up to " << wal_->lastLogId() + << " update lastLogId_ " << lastLogId_ << " to wal's"; lastLogId_ = wal_->lastLogId(); lastLogTerm_ = wal_->lastLogTerm(); } @@ -1474,21 +1468,21 @@ void RaftPart::processAskForVoteRequest(const cpp2::AskForVoteRequest& req, void RaftPart::processAppendLogRequest(const cpp2::AppendLogRequest& req, cpp2::AppendLogResponse& resp) { - LOG_IF(INFO, FLAGS_trace_raft) << idStr_ << "Received logAppend" - << ": GraphSpaceId = " << req.get_space() - << ", partition = " << req.get_part() - << ", leaderIp = " << req.get_leader_addr() - << ", leaderPort = " << req.get_leader_port() - << ", current_term = " << req.get_current_term() - << ", committedLogId = " << req.get_committed_log_id() - << ", lastLogIdSent = " << req.get_last_log_id_sent() - << ", lastLogTermSent = " << req.get_last_log_term_sent() - << ", num_logs = " << req.get_log_str_list().size() - << ", local lastLogId = " << lastLogId_ - << ", local lastLogTerm = " << lastLogTerm_ - << ", local committedLogId = " << committedLogId_ - << ", local current term = " << term_ - << ", wal lastLogId = " << wal_->lastLogId(); + VLOG_IF(1, FLAGS_trace_raft) << idStr_ << "Received logAppend" + << ": GraphSpaceId = " << req.get_space() + << ", partition = " << req.get_part() + << ", leaderIp = " << req.get_leader_addr() + << ", leaderPort = " << req.get_leader_port() + << ", current_term = " << req.get_current_term() + << ", committedLogId = " << req.get_committed_log_id() + << ", lastLogIdSent = " << req.get_last_log_id_sent() + << ", lastLogTermSent = " << req.get_last_log_term_sent() + << ", num_logs = " << req.get_log_str_list().size() + << ", local lastLogId = " << lastLogId_ + << ", local lastLogTerm = " << lastLogTerm_ + << ", local committedLogId = " << committedLogId_ + << ", local current term = " << term_ + << ", wal lastLogId = " << wal_->lastLogId(); std::lock_guard g(raftLock_); resp.current_term_ref() = term_; @@ -1501,17 +1495,17 @@ void RaftPart::processAppendLogRequest(const cpp2::AppendLogRequest& req, // Check status if (UNLIKELY(status_ == Status::STOPPED)) { - VLOG(2) << idStr_ << "The part has been stopped, skip the request"; + VLOG(3) << idStr_ << "The part has been stopped, skip the request"; resp.error_code_ref() = nebula::cpp2::ErrorCode::E_RAFT_STOPPED; return; } if (UNLIKELY(status_ == Status::STARTING)) { - VLOG(2) << idStr_ << "The partition is still starting"; + VLOG(3) << idStr_ << "The partition is still starting"; resp.error_code_ref() = nebula::cpp2::ErrorCode::E_RAFT_NOT_READY; return; } if (UNLIKELY(status_ == Status::WAITING_SNAPSHOT)) { - VLOG(2) << idStr_ << "The partition is waiting for snapshot"; + VLOG(3) << idStr_ << "The partition is waiting for snapshot"; resp.error_code_ref() = nebula::cpp2::ErrorCode::E_RAFT_WAITING_SNAPSHOT; return; } @@ -1521,7 +1515,7 @@ void RaftPart::processAppendLogRequest(const cpp2::AppendLogRequest& req, resp.current_term_ref() = term_; if (err != nebula::cpp2::ErrorCode::SUCCEEDED) { // Wrong leadership - VLOG(2) << idStr_ << "Will not follow the leader"; + VLOG(3) << idStr_ << "Will not follow the leader"; resp.error_code_ref() = err; return; } @@ -1613,7 +1607,7 @@ void RaftPart::processAppendLogRequest(const cpp2::AppendLogRequest& req, auto [code, lastCommitId, lastCommitTerm] = commitLogs(std::move(walIt), false); if (code == nebula::cpp2::ErrorCode::SUCCEEDED) { stats::StatsManager::addValue(kCommitLogLatencyUs, execTime_); - VLOG(1) << idStr_ << "Follower succeeded committing log " << committedLogId_ + 1 << " to " + VLOG(4) << idStr_ << "Follower succeeded committing log " << committedLogId_ + 1 << " to " << lastLogIdCanCommit; CHECK_EQ(lastLogIdCanCommit, lastCommitId); committedLogId_ = lastCommitId; @@ -1621,7 +1615,7 @@ void RaftPart::processAppendLogRequest(const cpp2::AppendLogRequest& req, resp.committed_log_id_ref() = lastLogIdCanCommit; resp.error_code_ref() = nebula::cpp2::ErrorCode::SUCCEEDED; } else if (code == nebula::cpp2::ErrorCode::E_WRITE_STALLED) { - VLOG(1) << idStr_ << "Follower delay committing log " << committedLogId_ + 1 << " to " + VLOG(4) << idStr_ << "Follower delay committing log " << committedLogId_ + 1 << " to " << lastLogIdCanCommit; // Even if log is not applied to state machine, still regard as succeeded: // 1. As a follower, upcoming request will try to commit them @@ -1629,8 +1623,8 @@ void RaftPart::processAppendLogRequest(const cpp2::AppendLogRequest& req, resp.committed_log_id_ref() = committedLogId_; resp.error_code_ref() = nebula::cpp2::ErrorCode::SUCCEEDED; } else { - LOG(ERROR) << idStr_ << "Failed to commit log " << committedLogId_ + 1 << " to " - << req.get_committed_log_id(); + VLOG(3) << idStr_ << "Failed to commit log " << committedLogId_ + 1 << " to " + << req.get_committed_log_id(); resp.committed_log_id_ref() = committedLogId_; resp.error_code_ref() = nebula::cpp2::ErrorCode::E_RAFT_WAL_FAIL; } @@ -1652,11 +1646,11 @@ nebula::cpp2::ErrorCode RaftPart::verifyLeader(const REQ& req) { return code; } - VLOG(2) << idStr_ << "The current role is " << roleStr(role_); + VLOG(4) << idStr_ << "The current role is " << roleStr(role_); // Make sure the remote term is greater than local's if (req.get_current_term() < term_) { - LOG_EVERY_N(INFO, 100) << idStr_ << "The current role is " << roleStr(role_) - << ". The local term is " << term_ << ". The remote term is not newer"; + VLOG(3) << idStr_ << "The current role is " << roleStr(role_) << ". The local term is " << term_ + << ". The remote term is not newer"; return nebula::cpp2::ErrorCode::E_RAFT_TERM_OUT_OF_DATE; } else if (req.get_current_term() > term_) { // found new leader with higher term @@ -1680,8 +1674,8 @@ nebula::cpp2::ErrorCode RaftPart::verifyLeader(const REQ& req) { Role oldRole = role_; TermID oldTerm = term_; // Ok, no reason to refuse, just follow the leader - LOG(INFO) << idStr_ << "The current role is " << roleStr(role_) << ". Will follow the new leader " - << peer << " on term " << req.get_current_term(); + VLOG(1) << idStr_ << "The current role is " << roleStr(role_) << ". Will follow the new leader " + << peer << " on term " << req.get_current_term(); if (role_ != Role::LEARNER) { role_ = Role::FOLLOWER; @@ -1692,8 +1686,8 @@ nebula::cpp2::ErrorCode RaftPart::verifyLeader(const REQ& req) { // Before accept the logs from the new leader, check the logs locally. if (oldRole == Role::LEADER) { if (wal_->lastLogId() > lastLogId_) { - LOG(INFO) << idStr_ << "There are some logs up to " << wal_->lastLogId() - << " update lastLogId_ " << lastLogId_ << " to wal's"; + VLOG(2) << idStr_ << "There are some logs up to " << wal_->lastLogId() + << " update lastLogId_ " << lastLogId_ << " to wal's"; lastLogId_ = wal_->lastLogId(); lastLogTerm_ = wal_->lastLogTerm(); } @@ -1711,19 +1705,19 @@ nebula::cpp2::ErrorCode RaftPart::verifyLeader(const REQ& req) { void RaftPart::processHeartbeatRequest(const cpp2::HeartbeatRequest& req, cpp2::HeartbeatResponse& resp) { - LOG_IF(INFO, FLAGS_trace_raft) << idStr_ << "Received heartbeat" - << ": GraphSpaceId = " << req.get_space() - << ", partition = " << req.get_part() - << ", leaderIp = " << req.get_leader_addr() - << ", leaderPort = " << req.get_leader_port() - << ", current_term = " << req.get_current_term() - << ", committedLogId = " << req.get_committed_log_id() - << ", lastLogIdSent = " << req.get_last_log_id_sent() - << ", lastLogTermSent = " << req.get_last_log_term_sent() - << ", local lastLogId = " << lastLogId_ - << ", local lastLogTerm = " << lastLogTerm_ - << ", local committedLogId = " << committedLogId_ - << ", local current term = " << term_; + VLOG_IF(1, FLAGS_trace_raft) << idStr_ << "Received heartbeat" + << ": GraphSpaceId = " << req.get_space() + << ", partition = " << req.get_part() + << ", leaderIp = " << req.get_leader_addr() + << ", leaderPort = " << req.get_leader_port() + << ", current_term = " << req.get_current_term() + << ", committedLogId = " << req.get_committed_log_id() + << ", lastLogIdSent = " << req.get_last_log_id_sent() + << ", lastLogTermSent = " << req.get_last_log_term_sent() + << ", local lastLogId = " << lastLogId_ + << ", local lastLogTerm = " << lastLogTerm_ + << ", local committedLogId = " << committedLogId_ + << ", local current term = " << term_; std::lock_guard g(raftLock_); // As for heartbeat, last_log_id and last_log_term is not checked by leader, follower only verify @@ -1740,12 +1734,12 @@ void RaftPart::processHeartbeatRequest(const cpp2::HeartbeatRequest& req, // Check status if (UNLIKELY(status_ == Status::STOPPED)) { - VLOG(2) << idStr_ << "The part has been stopped, skip the request"; + VLOG(3) << idStr_ << "The part has been stopped, skip the request"; resp.error_code_ref() = nebula::cpp2::ErrorCode::E_RAFT_STOPPED; return; } if (UNLIKELY(status_ == Status::STARTING)) { - VLOG(2) << idStr_ << "The partition is still starting"; + VLOG(3) << idStr_ << "The partition is still starting"; resp.error_code_ref() = nebula::cpp2::ErrorCode::E_RAFT_NOT_READY; return; } @@ -1755,7 +1749,7 @@ void RaftPart::processHeartbeatRequest(const cpp2::HeartbeatRequest& req, resp.current_term_ref() = term_; if (err != nebula::cpp2::ErrorCode::SUCCEEDED) { // Wrong leadership - VLOG(2) << idStr_ << "Will not follow the leader"; + VLOG(3) << idStr_ << "Will not follow the leader"; resp.error_code_ref() = err; return; } @@ -1770,35 +1764,35 @@ void RaftPart::processHeartbeatRequest(const cpp2::HeartbeatRequest& req, void RaftPart::processSendSnapshotRequest(const cpp2::SendSnapshotRequest& req, cpp2::SendSnapshotResponse& resp) { - VLOG(1) << idStr_ << "Receive snapshot, total rows " << req.get_rows().size() + VLOG(2) << idStr_ << "Receive snapshot, total rows " << req.get_rows().size() << ", total count received " << req.get_total_count() << ", total size received " << req.get_total_size() << ", finished " << req.get_done(); std::lock_guard g(raftLock_); // Check status if (UNLIKELY(status_ == Status::STOPPED)) { - LOG(ERROR) << idStr_ << "The part has been stopped, skip the request"; + VLOG(3) << idStr_ << "The part has been stopped, skip the request"; resp.error_code_ref() = nebula::cpp2::ErrorCode::E_RAFT_STOPPED; return; } if (UNLIKELY(status_ == Status::STARTING)) { - LOG(ERROR) << idStr_ << "The partition is still starting"; + VLOG(3) << idStr_ << "The partition is still starting"; resp.error_code_ref() = nebula::cpp2::ErrorCode::E_RAFT_NOT_READY; return; } if (UNLIKELY(role_ != Role::FOLLOWER && role_ != Role::LEARNER)) { - LOG(ERROR) << idStr_ << "Bad role " << roleStr(role_); + VLOG(3) << idStr_ << "Bad role " << roleStr(role_); resp.error_code_ref() = nebula::cpp2::ErrorCode::E_RAFT_STOPPED; return; } if (UNLIKELY(leader_ != HostAddr(req.get_leader_addr(), req.get_leader_port()) || term_ != req.get_term())) { - LOG(ERROR) << idStr_ << "Term out of date, current term " << term_ << ", received term " - << req.get_term(); + VLOG(2) << idStr_ << "Term out of date, current term " << term_ << ", received term " + << req.get_term(); resp.error_code_ref() = nebula::cpp2::ErrorCode::E_RAFT_TERM_OUT_OF_DATE; return; } if (status_ != Status::WAITING_SNAPSHOT) { - LOG(INFO) << idStr_ << "Begin to receive the snapshot"; + VLOG(2) << idStr_ << "Begin to receive the snapshot"; reset(); status_ = Status::WAITING_SNAPSHOT; } @@ -1810,9 +1804,9 @@ void RaftPart::processSendSnapshotRequest(const cpp2::SendSnapshotRequest& req, lastTotalCount_ += ret.first; lastTotalSize_ += ret.second; if (lastTotalCount_ != req.get_total_count() || lastTotalSize_ != req.get_total_size()) { - LOG(ERROR) << idStr_ << "Bad snapshot, total rows received " << lastTotalCount_ - << ", total rows sended " << req.get_total_count() << ", total size received " - << lastTotalSize_ << ", total size sended " << req.get_total_size(); + VLOG(2) << idStr_ << "Bad snapshot, total rows received " << lastTotalCount_ + << ", total rows sended " << req.get_total_count() << ", total size received " + << lastTotalSize_ << ", total size sended " << req.get_total_size(); resp.error_code_ref() = nebula::cpp2::ErrorCode::E_RAFT_PERSIST_SNAPSHOT_FAILED; return; } @@ -1826,9 +1820,9 @@ void RaftPart::processSendSnapshotRequest(const cpp2::SendSnapshotRequest& req, DCHECK_EQ(wal_->firstLogId(), 0); DCHECK_EQ(wal_->lastLogId(), 0); status_ = Status::RUNNING; - LOG(INFO) << idStr_ << "Receive all snapshot, committedLogId_ " << committedLogId_ - << ", committedLogTerm_ " << committedLogTerm_ << ", lastLodId " << lastLogId_ - << ", lastLogTermId " << lastLogTerm_; + VLOG(1) << idStr_ << "Receive all snapshot, committedLogId_ " << committedLogId_ + << ", committedLogTerm_ " << committedLogTerm_ << ", lastLodId " << lastLogId_ + << ", lastLogTermId " << lastLogTerm_; } resp.error_code_ref() = nebula::cpp2::ErrorCode::SUCCEEDED; return; @@ -1867,7 +1861,7 @@ void RaftPart::sendHeartbeat() { gen::from(hosts) | gen::map([self = shared_from_this(), eb, currTerm, commitLogId, prevLogId, prevLogTerm]( std::shared_ptr hostPtr) { - VLOG(2) << self->idStr_ << "Send heartbeat to " << hostPtr->idStr(); + VLOG(4) << self->idStr_ << "Send heartbeat to " << hostPtr->idStr(); return via(eb, [=]() -> Future { return hostPtr->sendHeartbeat(eb, currTerm, commitLogId, prevLogTerm, prevLogId); }); @@ -1902,7 +1896,7 @@ void RaftPart::sendHeartbeat() { } } if (numSucceeded >= replica) { - VLOG(2) << idStr_ << "Heartbeat is accepted by quorum"; + VLOG(4) << idStr_ << "Heartbeat is accepted by quorum"; std::lock_guard g(raftLock_); auto now = time::WallClock::fastNowInMilliSec(); lastMsgAcceptedCostMs_ = now - startMs; @@ -1968,21 +1962,21 @@ void RaftPart::reset() { nebula::cpp2::ErrorCode RaftPart::isCatchedUp(const HostAddr& peer) { std::lock_guard lck(raftLock_); - LOG(INFO) << idStr_ << "Check whether I catch up"; + VLOG(2) << idStr_ << "Check whether I catch up"; if (role_ != Role::LEADER) { - LOG(INFO) << idStr_ << "I am not the leader"; + VLOG(2) << idStr_ << "I am not the leader"; return nebula::cpp2::ErrorCode::E_LEADER_CHANGED; } if (peer == addr_) { - LOG(INFO) << idStr_ << "I am the leader"; + VLOG(2) << idStr_ << "I am the leader"; return nebula::cpp2::ErrorCode::SUCCEEDED; } for (auto& host : hosts_) { if (host->addr_ == peer) { if (host->followerCommittedLogId_ == 0 || host->followerCommittedLogId_ < wal_->firstLogId()) { - LOG(INFO) << idStr_ << "The committed log id of peer is " << host->followerCommittedLogId_ - << ", which is invalid or less than my first wal log id"; + VLOG(2) << idStr_ << "The committed log id of peer is " << host->followerCommittedLogId_ + << ", which is invalid or less than my first wal log id"; return nebula::cpp2::ErrorCode::E_RAFT_SENDING_SNAPSHOT; } return host->sendingSnapshot_ ? nebula::cpp2::ErrorCode::E_RAFT_SENDING_SNAPSHOT @@ -2003,15 +1997,15 @@ void RaftPart::checkAndResetPeers(const std::vector& peers) { // To avoid the iterator invalid, we use another container for it. decltype(hosts_) hosts = hosts_; for (auto& h : hosts) { - LOG(INFO) << idStr_ << "Check host " << h->addr_; + VLOG(1) << idStr_ << "Check host " << h->addr_; auto it = std::find(peers.begin(), peers.end(), h->addr_); if (it == peers.end()) { - LOG(INFO) << idStr_ << "The peer " << h->addr_ << " should not exist in my peers"; + VLOG(1) << idStr_ << "The peer " << h->addr_ << " should not exist in my peers"; removePeer(h->addr_); } } for (auto& p : peers) { - LOG(INFO) << idStr_ << "Add peer " << p << " if not exist!"; + VLOG(1) << idStr_ << "Add peer " << p << " if not exist!"; addPeer(p); } } @@ -2021,18 +2015,19 @@ void RaftPart::checkRemoteListeners(const std::set& expected) { for (const auto& host : actual) { auto it = std::find(expected.begin(), expected.end(), host); if (it == expected.end()) { - LOG(INFO) << idStr_ << "The listener " << host << " should not exist in my peers"; + VLOG(1) << idStr_ << "The listener " << host << " should not exist in my peers"; removeListenerPeer(host); } } for (const auto& host : expected) { auto it = std::find(actual.begin(), actual.end(), host); if (it == actual.end()) { - LOG(INFO) << idStr_ << "Add listener " << host << " to my peers"; + VLOG(1) << idStr_ << "Add listener " << host << " to my peers"; addListenerPeer(host); } } } + bool RaftPart::leaseValid() { std::lock_guard g(raftLock_); if (hosts_.empty()) { diff --git a/src/kvstore/raftex/RaftPart.h b/src/kvstore/raftex/RaftPart.h index 8cfaa55d81f..156724ef49c 100644 --- a/src/kvstore/raftex/RaftPart.h +++ b/src/kvstore/raftex/RaftPart.h @@ -34,6 +34,14 @@ class FileBasedWal; namespace raftex { +/** + * @brief Log type of raft log + * NORMAL: Normal log could be in any position of a batch + * ATOMIC_OP: Atomic op should be the first log in a batch (a batch with only one atomic op log is + legal as well), there won't be more than one atomic op log in the same batch + * COMMAND: Command should only be the last log in a batch (a batch with only one command log is + legal as well), there won't be more than one atomic op log in the same batch + */ enum class LogType { NORMAL = 0x00, ATOMIC_OP = 0x01, @@ -69,84 +77,169 @@ class RaftPart : public std::enable_shared_from_this { public: virtual ~RaftPart(); + /** + * @brief Return whether RaftPart is running + */ bool isRunning() const { std::lock_guard g(raftLock_); return status_ == Status::RUNNING; } + /** + * @brief Return whether RaftPart is stopped + */ bool isStopped() const { std::lock_guard g(raftLock_); return status_ == Status::STOPPED; } + /** + * @brief Return whether RaftPart is leader + */ bool isLeader() const { std::lock_guard g(raftLock_); return role_ == Role::LEADER; } + /** + * @brief Return whether RaftPart is follower + */ bool isFollower() const { std::lock_guard g(raftLock_); return role_ == Role::FOLLOWER; } + /** + * @brief Return whether RaftPart is learner + */ bool isLearner() const { std::lock_guard g(raftLock_); return role_ == Role::LEARNER; } + /** + * @brief Return the cluster id of RaftPart + */ ClusterID clusterId() const { return clusterId_; } + /** + * @brief Return the space id of RaftPart + */ GraphSpaceID spaceId() const { return spaceId_; } + /** + * @brief Return the part id of RaftPart + */ PartitionID partitionId() const { return partId_; } + /** + * @brief Return the address of RaftPart + */ const HostAddr& address() const { return addr_; } + /** + * @brief Return the leader address of RaftPart + */ HostAddr leader() const { std::lock_guard g(raftLock_); return leader_; } + /** + * @brief Return the term of RaftPart + */ TermID termId() const { return term_; } + /** + * @brief Return the wal + */ std::shared_ptr wal() const { return wal_; } + /** + * @brief Add a raft learner to its peers + * + * @param learner Learner address + */ void addLearner(const HostAddr& learner); + /** + * @brief When commit to state machine, old leader will step down as follower + * + * @param target Target new leader + */ void commitTransLeader(const HostAddr& target); + /** + * @brief Pre-process of transfer leader, target new leader will start election task to background + * worker + * + * @param target Target new leader + */ void preProcessTransLeader(const HostAddr& target); + /** + * @brief Pre-process of remove a host from peers, follower will remove the peer in + * preProcessRemovePeer, leader will remove in commitRemovePeer + * + * @param peer Target peer to remove + */ void preProcessRemovePeer(const HostAddr& peer); + /** + * @brief Commit of remove a host from peers, follower will remove the peer in + * preProcessRemovePeer, leader will remove in commitRemovePeer + * + * @param peer Target peer to remove + */ void commitRemovePeer(const HostAddr& peer); + // All learner and listener are raft learner. The difference between listener and learner is that + // learner could be promoted to follower, but listener could not. (learner are added to hosts_, + // but listener are added to listeners_) + // todo(doodle): separate learner and listener into different raft role + /** + * @brief Add listener peer. + * + * @param peer Listener address + */ void addListenerPeer(const HostAddr& peer); + /** + * @brief Remove listener peer + * + * @param peer Listener address + */ void removeListenerPeer(const HostAddr& peer); - // Change the partition status to RUNNING. This is called - // by the inherited class, when it's ready to serve + /** + * @brief Change the partition status to RUNNING. This is called by the inherited class, when it's + * ready to serve + * + * @param peers All raft peers to add + * @param asLearner Whether start as raft learner + */ virtual void start(std::vector&& peers, bool asLearner = false); - // Change the partition status to STOPPED. This is called - // by the inherited class, when it's about to stop + /** + * @brief Change the partition status to STOPPED. This is called by the inherited class, when it's + * about to stop + */ virtual void stop(); - /***************************************************************** - * Asynchronously append a log + /** + * @brief Asynchronously append a log * * This is the **PUBLIC** Log Append API, used by storage * service @@ -158,34 +251,58 @@ class RaftPart : public std::enable_shared_from_this { * be fulfilled * * If the source == -1, the current clusterId will be used - ****************************************************************/ + * + * @param source Cluster id + * @param log Log message to append + * @return folly::Future + */ folly::Future appendAsync(ClusterID source, std::string log); - /**************************************************************** - * Run the op atomically. - ***************************************************************/ + /** + * @brief Trigger the op atomically. If the atomic operation succeed, and append the output log + * message of ATOMIC_OP type + * + * @param op Atomic operation, will output a log if succeed + * @return folly::Future + */ folly::Future atomicOpAsync(AtomicOp op); /** - * Asynchronously send one command. - * */ + * @brief Send a log of COMMAND type + * + * @param log Command log + * @return folly::Future + */ folly::Future sendCommandAsync(std::string log); /** - * Check if the peer has catched up data from leader. If leader is sending the + * @brief Check if the peer has catched up data from leader. If leader is sending the * snapshot, the method will return false. - * */ + * + * @param peer The peer to check if it has catched up + * @return nebula::cpp2::ErrorCode + */ nebula::cpp2::ErrorCode isCatchedUp(const HostAddr& peer); + /** + * @brief Hard link the wal files to a new path + * + * @param newPath New wal path + * @return Whether link succeed + */ bool linkCurrentWAL(const char* newPath); /** - * Reset my peers if not equals the argument + * @brief Reset my peers if not equals the argument + * + * @param peers Expect peers */ void checkAndResetPeers(const std::vector& peers); /** - * Add listener into peers or remove from peers + * @brief Check my remote listeners is equal to the expected one, add/remove if necessary + * + * @param listeners Expect remote listeners */ void checkRemoteListeners(const std::set& listeners); @@ -194,46 +311,114 @@ class RaftPart : public std::enable_shared_from_this { * Methods to process incoming raft requests * ****************************************************/ + + /** + * @brief Check my remote listeners is equal to the expected one, add/remove if necessary + * + * @param listeners Expect remote listeners + */ void getState(cpp2::GetStateResponse& resp); - // Process the incoming leader election request + /** + * @brief Process the incoming leader election request + * + * @param req + * @param resp + */ void processAskForVoteRequest(const cpp2::AskForVoteRequest& req, cpp2::AskForVoteResponse& resp); - // Process appendLog request + /** + * @brief Process append log request + * + * @param req + * @param resp + */ void processAppendLogRequest(const cpp2::AppendLogRequest& req, cpp2::AppendLogResponse& resp); - // Process sendSnapshot request + /** + * @brief Process send snapshot request + * + * @param req + * @param resp + */ void processSendSnapshotRequest(const cpp2::SendSnapshotRequest& req, cpp2::SendSnapshotResponse& resp); + /** + * @brief Process heartbeat request + * + * @param req + * @param resp + */ void processHeartbeatRequest(const cpp2::HeartbeatRequest& req, cpp2::HeartbeatResponse& resp); + /** + * @brief Return whether leader lease is still valid + */ bool leaseValid(); + /** + * @brief Return whether we need to clean expired wal + */ bool needToCleanWal(); - // leader + followers + /** + * @brief Get the address of node which has the partition, local address and all peers address + * + * @return std::vector local address and all peers address + */ std::vector peers() const; + /** + * @brief All listeners address + * + * @return std::set + */ std::set listeners() const; + /** + * @brief Return last log id and last log term in wal + * + * @return std::pair Pair of last log id and last log term in wal + */ std::pair lastLogInfo() const; - // Reset the part, clean up all data and WALs. + /** + * @brief Reset the part, clean up all data and WALs. + */ void reset(); + /** + * @brief Execution time of some operation, for statistics + * + * @return uint64_t Time in us + */ uint64_t execTime() const { return execTime_; } protected: - // Protected constructor to prevent from instantiating directly + /** + * @brief Construct a new RaftPart + * + * @param clusterId + * @param spaceId + * @param partId + * @param localAddr Listener ip/addr + * @param walPath Listener's wal path + * @param ioPool IOThreadPool for listener + * @param workers Background thread for listener + * @param executor Worker thread for listener + * @param snapshotMan Snapshot manager + * @param clientMan Client manager + * @param diskMan Disk manager + */ RaftPart(ClusterID clusterId, GraphSpaceID spaceId, PartitionID partId, HostAddr localAddr, - const folly::StringPiece walRoot, - std::shared_ptr pool, + const folly::StringPiece walPath, + std::shared_ptr ioPool, std::shared_ptr workers, std::shared_ptr executor, std::shared_ptr snapshotMan, @@ -243,58 +428,121 @@ class RaftPart : public std::enable_shared_from_this { using Status = cpp2::Status; using Role = cpp2::Role; + /** + * @brief The str of the RaftPart, used in logging + */ const char* idStr() const { return idStr_.c_str(); } - // The method will be invoked by start() - // - // Inherited classes should implement this method to provide the last - // committed log id + /** + * @brief Inherited classes should implement this method to provide the last commit log id and + * last commit log term. The method will be invoked by start() + * + * @return std::pair Last commit log id and last commit log term + */ virtual std::pair lastCommittedLogId() = 0; - // This method is called when this partition's leader term - // is finished, either by receiving a new leader election - // request, or a new leader heartbeat + /** + * @brief This method is called when this partition's leader term is finished, either by receiving + * a new leader election request, or a new leader heartbeat + * + * @param term New term from peers + */ virtual void onLostLeadership(TermID term) = 0; - // This method is called when this partition is elected as - // a new leader + /** + * @brief This method is called when this partition is elected as a new leader + * + * @param term Term when elected as leader + */ virtual void onElected(TermID term) = 0; - // called after leader committed first log - // (a little bit later onElected) - // leader need to set some internal status after elected. + /** + * @brief This method is called after leader committed first log (a little bit later onElected), + * leader need to set some internal status after elected. + * + * @param term + */ virtual void onLeaderReady(TermID term) = 0; + /** + * @brief Callback when a raft node discover new leader + * + * @param nLeader New leader's address + */ virtual void onDiscoverNewLeader(HostAddr nLeader) = 0; - // Check if we can accept candidate's message + /** + * @brief Check if we can accept candidate's message + * + * @param candidate The sender of the message + * @return nebula::cpp2::ErrorCode + */ virtual nebula::cpp2::ErrorCode checkPeer(const HostAddr& candidate); - // The inherited classes need to implement this method to commit a batch of log messages. - // Return {error code, last commit log id, last commit log term}. - // When no logs applied to state machine or error occurs when calling commitLogs, - // kNoCommitLogId and kNoCommitLogTerm are returned. + /** + * @brief The inherited classes need to implement this method to commit a batch of log messages. + * + * @param iter Log iterator of all logs to commit + * @param wait Whether wait until all logs has been applied to state machine + * @return std::tuple + * Return {error code, last commit log id, last commit log term}. When no logs applied to state + * machine or error occurs when calling commitLogs, kNoCommitLogId and kNoCommitLogTerm are + * returned. + */ virtual std::tuple commitLogs( std::unique_ptr iter, bool wait) = 0; + /** + * @brief A interface to pre-process wal, mainly for membership change + * + * @param logId Log id to pre-process + * @param termId Log term to pre-process + * @param clusterId Cluster id in wal + * @param log Log message in wal + * @return True if succeed. False if failed. + */ virtual bool preProcessLog(LogID logId, TermID termId, ClusterID clusterId, const std::string& log) = 0; - // Return committed; + /** + * @brief If raft node falls behind way to much than leader, the leader will send all its data in + * snapshot by batch, derived class need to implement this method to apply the batch to state + * machine. + * + * @param data Data to apply + * @param committedLogId Commit log id of snapshot + * @param committedLogTerm Commit log term of snapshot + * @param finished Whether spapshot is finished + * @return std::pair Return count and size of in the data + */ virtual std::pair commitSnapshot(const std::vector& data, LogID committedLogId, TermID committedLogTerm, bool finished) = 0; - // Clean up extra data about the part, usually related to state machine + /** + * @brief Clean up extra data about the partition, usually related to state machine + * + * @return nebula::cpp2::ErrorCode + */ virtual nebula::cpp2::ErrorCode cleanup() = 0; + /** + * @brief Add a host to my peers + * + * @param peer Address to add + */ void addPeer(const HostAddr& peer); + /** + * @brief Remove a host from my peers + * + * @param peer Address to remove + */ void removePeer(const HostAddr& peer); private: @@ -313,71 +561,168 @@ class RaftPart : public std::enable_shared_from_this { * Private methods * ***************************************************/ + + /** + * @brief Return the role in string + * + * @param role Raft role + * @return const char* + */ const char* roleStr(Role role) const; + /** + * @brief Verify if the request can be accepted when receiving a AppendLog or Heartbeat request + * + * @tparam REQ AppendLogRequest or HeartbeatRequest + * @param req RPC requeset + * @return nebula::cpp2::ErrorCode + */ template nebula::cpp2::ErrorCode verifyLeader(const REQ& req); - /***************************************************************** - * - * Asynchronously send a heartbeat - * - ****************************************************************/ - void sendHeartbeat(); - /**************************************************** * * Methods used by the status polling logic * ***************************************************/ + + /** + * @brief Polling to check some status + * + * @param startTime Start time of the RaftPart, only used in test case + */ + void statusPolling(int64_t startTime); + + /** + * @brief Return whether need to send heartbeat + */ bool needToSendHeartbeat(); - bool needToStartElection(); + /** + * @brief Asynchronously send a heartbeat + */ + void sendHeartbeat(); - void statusPolling(int64_t startTime); + /** + * @brief Return whether need to trigger leader election + */ + bool needToStartElection(); + /** + * @brief Return whether need to clean snapshot when a node has not received the snapshot for a + * period of time + */ bool needToCleanupSnapshot(); + /** + * @brief Clean up the outdated snapshot + */ void cleanupSnapshot(); - // The method sends out AskForVote request - // Return true if I have been granted majority votes on proposedTerm, no matter isPreVote or not + /** + * @brief The method sends out AskForVote request. Return true if I have been granted majority + * votes on proposedTerm, no matter isPreVote or not + * + * @param isPreVote Whether this is a pre-vote + * @return folly::Future Whether get majority votes + */ folly::Future leaderElection(bool isPreVote); - // The method will fill up the request object and return TRUE - // if the election should continue. Otherwise the method will - // return FALSE + /** + * @brief The method will fill up the request object and return TRUE if the election should + * continue. Otherwise the method will return FALSE + * + * @param req The request to send + * @param hosts Raft peers + * @param isPreVote Whether this is a pre-vote + * @return Whether we have a valid request + */ bool prepareElectionRequest(cpp2::AskForVoteRequest& req, std::vector>& hosts, bool isPreVote); - // Return true if I have been granted majority votes on proposedTerm, no matter isPreVote or not + /** + * @brief Handle the leader election responses + * + * @param resps Leader election response + * @param hosts Raft peers + * @param proposedTerm Which term I proposed to be leader + * @param isPreVote Whether this is a pre-vote + * @return Return true if I have been granted majority votes on proposedTerm, no matter isPreVote + * or not + * + */ bool handleElectionResponses(const ElectionResponses& resps, const std::vector>& hosts, TermID proposedTerm, bool isPreVote); - // Return true if I have been granted majority votes on proposedTerm, no matter isPreVote or not + /** + * @brief Check if have been granted from majority peers, no matter isPreVote or not. Convert to + * leader if it is a formal election, and I have received majority votes. + * + * @param results Leader election response + * @param hosts Raft peers + * @param proposedTerm Which term I proposed to be leader + * @param isPreVote Whether this is a pre-vote + * @return Return true if I have been granted majority votes on proposedTerm, no matter isPreVote + * or not + */ bool processElectionResponses(const ElectionResponses& results, std::vector> hosts, TermID proposedTerm, bool isPreVote); - // Check whether new logs can be appended - // Pre-condition: The caller needs to hold the raftLock_ + /** + * @brief Check whether new logs can be appended + * @pre The caller needs to hold the raftLock_ + * + * @return nebula::cpp2::ErrorCode + */ nebula::cpp2::ErrorCode canAppendLogs(); - // Also check if term has changed - // Pre-condition: The caller needs to hold the raftLock_ + /** + * @brief Check if term has changed, and check if new logs can be appended + * @pre The caller needs to hold the raftLock_ + * + * @param currTerm Current term + * @return nebula::cpp2::ErrorCode + */ nebula::cpp2::ErrorCode canAppendLogs(TermID currTerm); + /** + * @brief The main interfaces to append log + * + * @param source Log cluster id + * @param logType Log type + * @param log Log message + * @param cb Callback when log is replicated + * @return folly::Future + */ folly::Future appendLogAsync(ClusterID source, LogType logType, std::string log, AtomicOp cb = nullptr); + /** + * @brief Append the logs in iterator + * + * @param iter Log iterator to replicate + * @param termId The term when building the iterator + */ void appendLogsInternal(AppendLogsIterator iter, TermID termId); + /** + * @brief Replicate the logs to peers by sending RPC + * + * @param eb The eventbase to send request + * @param iter Log iterator to send + * @param currTerm The term when building the iterator + * @param lastLogId The last log id in iterator + * @param committedId The commit log id + * @param prevLogTerm The last log term which has been sent + * @param prevLogId The last log id which has been sent + */ void replicateLogs(folly::EventBase* eb, AppendLogsIterator iter, TermID currTerm, @@ -386,6 +731,19 @@ class RaftPart : public std::enable_shared_from_this { TermID prevLogTerm, LogID prevLogId); + /** + * @brief Handle the log append response, apply to state machine if necessary + * + * @param resps Responses of peers + * @param eb The eventbase when sent request, used for retry and continue as well + * @param iter Log iterator, also used for continue to replicate remaing logs + * @param currTerm The term when building the iterator + * @param lastLogId The last log id in iterator + * @param committedId The commit log id + * @param prevLogTerm The last log term which has been sent + * @param prevLogId The last log id which has been sent + * @param hosts The Host of raft peers + */ void processAppendLogResponses(const AppendLogResponses& resps, folly::EventBase* eb, AppendLogsIterator iter, @@ -396,12 +754,24 @@ class RaftPart : public std::enable_shared_from_this { LogID prevLogId, std::vector> hosts); - // followers return Host of which could vote, in other words, learner is not - // counted in + /** + * @brief Return Host of which could vote, in other words, learner is not counted in + * + * @return std::vector> + */ std::vector> followers() const; + /** + * @brief Check if we succeed in append log. + * + * @param res Errorcode to check + * @return Return true directly if res is succeed, otherwise set the failed status. + */ bool checkAppendLogResult(nebula::cpp2::ErrorCode res); + /** + * @brief Update raft quorum when membership changes + */ void updateQuorum(); protected: @@ -417,12 +787,20 @@ class RaftPart : public std::enable_shared_from_this { PromiseSet& operator=(const PromiseSet&) = delete; PromiseSet& operator=(PromiseSet&& right) = default; + /** + * @brief Clean all promises + */ void reset() { sharedPromises_.clear(); singlePromises_.clear(); rollSharedPromise_ = true; } + /** + * @brief Used for NORMAL raft log + * + * @return folly::Future + */ folly::Future getSharedFuture() { if (rollSharedPromise_) { sharedPromises_.emplace_back(); @@ -432,6 +810,11 @@ class RaftPart : public std::enable_shared_from_this { return sharedPromises_.back().getFuture(); } + /** + * @brief Used for ATOMIC_OP raft log + * + * @return folly::Future + */ folly::Future getSingleFuture() { singlePromises_.emplace_back(); rollSharedPromise_ = true; @@ -439,6 +822,11 @@ class RaftPart : public std::enable_shared_from_this { return singlePromises_.back().getFuture(); } + /** + * @brief Used for COMMAND raft log + * + * @return folly::Future + */ folly::Future getAndRollSharedFuture() { if (rollSharedPromise_) { sharedPromises_.emplace_back(); @@ -447,6 +835,12 @@ class RaftPart : public std::enable_shared_from_this { return sharedPromises_.back().getFuture(); } + /** + * @brief Set shared promise + * + * @tparam VT + * @param val + */ template void setOneSharedValue(VT&& val) { CHECK(!sharedPromises_.empty()); @@ -454,6 +848,12 @@ class RaftPart : public std::enable_shared_from_this { sharedPromises_.pop_front(); } + /** + * @brief Set single promise + * + * @tparam VT + * @param val + */ template void setOneSingleValue(VT&& val) { CHECK(!singlePromises_.empty()); @@ -461,6 +861,11 @@ class RaftPart : public std::enable_shared_from_this { singlePromises_.pop_front(); } + /** + * @brief Set all promises to result, usually a failed result + * + * @param val + */ void setValue(ValueType val) { for (auto& p : sharedPromises_) { p.setValue(val); diff --git a/src/kvstore/raftex/RaftexService.cpp b/src/kvstore/raftex/RaftexService.cpp index 16ffda8b6ff..6ac42c60b76 100644 --- a/src/kvstore/raftex/RaftexService.cpp +++ b/src/kvstore/raftex/RaftexService.cpp @@ -176,8 +176,7 @@ std::shared_ptr RaftexService::findPart(GraphSpaceID spaceId, Partitio auto it = parts_.find(std::make_pair(spaceId, partId)); if (it == parts_.end()) { // Part not found - LOG_EVERY_N(WARNING, 100) << "Cannot find the part " << partId << " in the graph space " - << spaceId; + VLOG(4) << "Cannot find the part " << partId << " in the graph space " << spaceId; return std::shared_ptr(); } diff --git a/src/kvstore/raftex/RaftexService.h b/src/kvstore/raftex/RaftexService.h index 888da5caddd..74d9245b7bf 100644 --- a/src/kvstore/raftex/RaftexService.h +++ b/src/kvstore/raftex/RaftexService.h @@ -18,52 +18,153 @@ namespace raftex { class RaftPart; class IOThreadPoolObserver; +/** + * @brief Class to handle raft thrift server, also distribute request to RaftPart. + * Only heartbeat is processed in io thread, other requests are processed in worker thread + */ class RaftexService : public cpp2::RaftexServiceSvIf { public: + /** + * @brief Create a raft service + * + * @param pool IOThreadPool to use + * @param workers Worker thread pool to use + * @param port Listen port of thrift server + * @return std::shared_ptr + */ static std::shared_ptr createService( std::shared_ptr pool, std::shared_ptr workers, uint16_t port = 0); + + /** + * @brief Destroy the Raftex Service + */ virtual ~RaftexService(); + /** + * @brief Return the raft thrift server port + */ uint32_t getServerPort() const { return serverPort_; } + /** + * @brief Get the io thread pool + * + * @return std::shared_ptr + */ std::shared_ptr getIOThreadPool() const; + /** + * @brief Get the woker thread + * + * @return std::shared_ptr + */ std::shared_ptr getThreadManager(); + /** + * @brief Start the raft thrift server + * + * @return Whether start succeed + */ bool start(); + + /** + * @brief Set the state to stopped + */ void stop(); + + /** + * @brief Wait until the thrift server has been stopped + */ void waitUntilStop(); + /** + * @brief Handle leader election request in worker thread + * + * @param resp + * @param req + */ void askForVote(cpp2::AskForVoteResponse& resp, const cpp2::AskForVoteRequest& req) override; + /** + * @brief Get the raft part state of given partition + * + * @param resp + * @param req + */ void getState(cpp2::GetStateResponse& resp, const cpp2::GetStateRequest& req) override; + /** + * @brief Handle append log request in worker thread + * + * @param resp + * @param req + */ void appendLog(cpp2::AppendLogResponse& resp, const cpp2::AppendLogRequest& req) override; + /** + * @brief Handle send snapshot reqtuest in worker thread + * + * @param resp + * @param req + */ void sendSnapshot(cpp2::SendSnapshotResponse& resp, const cpp2::SendSnapshotRequest& req) override; + /** + * @brief Handle heartbeat request in io thread + * + * @param callback Thrift callback + * @param req + */ void async_eb_heartbeat( std::unique_ptr> callback, const cpp2::HeartbeatRequest& req) override; + /** + * @brief Register the RaftPart to the service + */ void addPartition(std::shared_ptr part); + + /** + * @brief Unregister the RaftPart to the service + */ void removePartition(std::shared_ptr part); + /** + * @brief Find the RaftPart by spaceId and partId + * + * @param spaceId + * @param partId + * @return std::shared_ptr + */ std::shared_ptr findPart(GraphSpaceID spaceId, PartitionID partId); private: + /** + * @brief Start the thrift server + * + * @param pool IO thread pool + * @param workers Worker thread pool + * @param port Thrift port to listener + */ void initThriftServer(std::shared_ptr pool, std::shared_ptr workers, uint16_t port = 0); + + /** + * @brief Prepare the setup of thrift server + * + * @return Return whether succeed + */ bool setup(); void serve(); - // Block until the service is ready to serve + /** + * @brief Wait until the service is ready to serve + */ void waitUntilReady(); RaftexService() = default; diff --git a/src/kvstore/raftex/SnapshotManager.cpp b/src/kvstore/raftex/SnapshotManager.cpp index f6e07a90a99..0182d46a3ad 100644 --- a/src/kvstore/raftex/SnapshotManager.cpp +++ b/src/kvstore/raftex/SnapshotManager.cpp @@ -40,9 +40,9 @@ folly::Future>> SnapshotManager::sendSnapshot( auto commitLogIdAndTerm = part->lastCommittedLogId(); const auto& localhost = part->address(); std::vector> results; - LOG(INFO) << part->idStr_ << "Begin to send the snapshot to the host " << dst - << ", commitLogId = " << commitLogIdAndTerm.first - << ", commitLogTerm = " << commitLogIdAndTerm.second; + VLOG(1) << part->idStr_ << "Begin to send the snapshot to the host " << dst + << ", commitLogId = " << commitLogIdAndTerm.first + << ", commitLogTerm = " << commitLogIdAndTerm.second; accessAllRowsInSnapshot( spaceId, partId, @@ -51,7 +51,7 @@ folly::Future>> SnapshotManager::sendSnapshot( int64_t totalSize, SnapshotStatus status) mutable -> bool { if (status == SnapshotStatus::FAILED) { - LOG(INFO) << part->idStr_ << "Snapshot send failed, the leader changed?"; + VLOG(1) << part->idStr_ << "Snapshot send failed, the leader changed?"; p.setValue(Status::Error("Send snapshot failed!")); return false; } @@ -73,28 +73,28 @@ folly::Future>> SnapshotManager::sendSnapshot( try { auto resp = std::move(f).get(); if (resp.get_error_code() == nebula::cpp2::ErrorCode::SUCCEEDED) { - VLOG(1) << part->idStr_ << "has sended count " << totalCount; + VLOG(3) << part->idStr_ << "has sended count " << totalCount; if (status == SnapshotStatus::DONE) { - LOG(INFO) << part->idStr_ << "Finished, totalCount " << totalCount - << ", totalSize " << totalSize; + VLOG(1) << part->idStr_ << "Finished, totalCount " << totalCount << ", totalSize " + << totalSize; p.setValue(commitLogIdAndTerm); } return true; } else { - LOG(INFO) << part->idStr_ << "Sending snapshot failed, we don't retry anymore! " - << "The error code is " - << apache::thrift::util::enumNameSafe(resp.get_error_code()); + VLOG(2) << part->idStr_ << "Sending snapshot failed, we don't retry anymore! " + << "The error code is " + << apache::thrift::util::enumNameSafe(resp.get_error_code()); p.setValue(Status::Error("Send snapshot failed!")); return false; } } catch (const std::exception& e) { - LOG(ERROR) << part->idStr_ << "Send snapshot failed, exception " << e.what() - << ", retry " << retry << " times"; + VLOG(3) << part->idStr_ << "Send snapshot failed, exception " << e.what() + << ", retry " << retry << " times"; sleep(1); continue; } } - LOG(WARNING) << part->idStr_ << "Send snapshot failed!"; + VLOG(2) << part->idStr_ << "Send snapshot failed!"; p.setValue(Status::Error("Send snapshot failed!")); return false; }); @@ -114,7 +114,7 @@ folly::Future SnapshotManager::send( int64_t totalCount, const HostAddr& addr, bool finished) { - VLOG(2) << "Send snapshot request to " << addr; + VLOG(4) << "Send snapshot request to " << addr; raftex::cpp2::SendSnapshotRequest req; req.space_ref() = spaceId; req.part_ref() = partId; diff --git a/src/kvstore/raftex/SnapshotManager.h b/src/kvstore/raftex/SnapshotManager.h index de613caaa6f..fc082708c46 100644 --- a/src/kvstore/raftex/SnapshotManager.h +++ b/src/kvstore/raftex/SnapshotManager.h @@ -37,11 +37,34 @@ class SnapshotManager { SnapshotManager(); virtual ~SnapshotManager() = default; - // Send snapshot for spaceId, partId to host dst. + /** + * @brief Send snapshot for spaceId, partId to host dst. + * + * @param part The RaftPart + * @param dst The address of target peer + * @return folly::Future>> Future of snapshot result, return the + * commit log id and commit log term if succeed + */ folly::Future>> sendSnapshot(std::shared_ptr part, const HostAddr& dst); private: + /** + * @brief Send the snapshot in batch + * + * @param spaceId + * @param partId + * @param termId Current term of RaftPart + * @param committedLogId The commit log id of snapshot + * @param committedLogTerm The commit log term of snapshot + * @param localhost Local address + * @param data The key/value to send + * @param totalSize The key/value has been sent in bytes + * @param totalCount Count of key/value has been sent + * @param addr Address of target peer + * @param finished Whether this is the last batch of snapshot + * @return folly::Future + */ folly::Future send(GraphSpaceID spaceId, PartitionID partId, TermID termId, @@ -54,6 +77,13 @@ class SnapshotManager { const HostAddr& addr, bool finished); + /** + * @brief Interface to scan data, and trigger callback to send them + * + * @param spaceId + * @param partId + * @param cb Callback to send data + */ virtual void accessAllRowsInSnapshot(GraphSpaceID spaceId, PartitionID partId, SnapshotCallback cb) = 0; diff --git a/src/kvstore/raftex/test/TestShard.cpp b/src/kvstore/raftex/test/TestShard.cpp index ef3e58a33a0..99a2c20f083 100644 --- a/src/kvstore/raftex/test/TestShard.cpp +++ b/src/kvstore/raftex/test/TestShard.cpp @@ -194,7 +194,7 @@ std::tuple TestShard::commitLogs( folly::RWSpinLock::WriteHolder wh(&lock_); currLogId_ = iter->logId(); data_.emplace_back(currLogId_, log.toString()); - VLOG(1) << idStr_ << "Write: " << log << ", LogId: " << currLogId_ + VLOG(2) << idStr_ << "Write: " << log << ", LogId: " << currLogId_ << " state machine log size: " << data_.size(); break; } @@ -225,7 +225,7 @@ std::pair TestShard::commitSnapshot(const std::vector end_) { + valid_ = false; + currRec_ = nullptr; + return *this; + } + // Operations after load SHOULD NOT reorder before it. + auto pos = currNode_->pos_.load(std::memory_order_acquire); + VLOG(5) << "currNode firstLogId = " << currNode_->firstLogId_ << ", currIndex = " << currIndex_ + << ", currNode pos " << pos; + if (currIndex_ >= pos) { + currNode_ = currNode_->prev_.load(std::memory_order_relaxed); + if (currNode_ == nullptr) { + valid_ = false; + currRec_ = nullptr; + return *this; + } else { + currIndex_ = 0; + } + } + DCHECK_LT(currIndex_, kMaxLength); + currRec_ = DCHECK_NOTNULL(currNode_)->rec(currIndex_); + return *this; +} + +void AtomicLogBuffer::Iterator::seek(LogID logId) { + currNode_ = logBuffer_->seek(logId); + if (currNode_ != nullptr) { + currIndex_ = logId - currNode_->firstLogId_; + // Since reader is only a snapshot, a possible case is that logId > currNode->firstLogId_, + // however, the logId we search may not in currNode. (e.g. currNode_ is the latest node, + // but currIndex_ >= kMaxLength). In this case, currRec_ will be an invalid one. + currRec_ = currNode_->rec(currIndex_); + valid_ = (currRec_ != nullptr); + } else { + valid_ = false; + } +} + +AtomicLogBuffer::~AtomicLogBuffer() { + auto refs = refs_.load(std::memory_order_acquire); + CHECK_EQ(0, refs); + auto* curr = head_.load(std::memory_order_relaxed); + auto* prev = curr; + while (curr != nullptr) { + curr = curr->next_; + delete prev; + prev = curr; + } + if (prev != nullptr) { + delete prev; + } +} + +void AtomicLogBuffer::push(LogID logId, Record&& record) { + auto* head = head_.load(std::memory_order_relaxed); + auto recSize = record.size(); + if (head == nullptr || head->isFull() || head->markDeleted_.load(std::memory_order_relaxed)) { + auto* newNode = new Node(); + newNode->firstLogId_ = logId; + newNode->next_ = head; + newNode->push_back(std::move(record)); + if (head == nullptr || head->markDeleted_.load(std::memory_order_relaxed)) { + // It is the first Node in current list, or head has been marked as + // deleted + firstLogId_.store(logId, std::memory_order_relaxed); + tail_.store(newNode, std::memory_order_relaxed); + } else if (head != nullptr) { + head->prev_.store(newNode, std::memory_order_release); + } + size_.fetch_add(recSize, std::memory_order_relaxed); + head_.store(newNode, std::memory_order_relaxed); + return; + } + if (size_ + recSize > capacity_) { + auto* tail = tail_.load(std::memory_order_relaxed); + // todo(doodle): there is a potential problem is that: since Node::isFull + // is judged by log count, we can only add new node when previous node + // has enough logs. So when tail is equal to head, we need to wait tail is + // full, after head moves forward, at then tail can be marked as deleted. + // So the log buffer would takes up more memory than its capacity. Since + // it does not affect correctness, we could fix it later if necessary. + if (tail != head) { + // We have more than one nodes in current list. + // So we mark the tail to be deleted. + bool expected = false; + VLOG(5) << "Mark node " << tail->firstLogId_ << " to be deleted!"; + auto marked = + tail->markDeleted_.compare_exchange_strong(expected, true, std::memory_order_relaxed); + auto* prev = tail->prev_.load(std::memory_order_relaxed); + firstLogId_.store(prev->firstLogId_, std::memory_order_relaxed); + // All operations above SHOULD NOT be reordered. + tail_.store(tail->prev_, std::memory_order_release); + if (marked) { + size_.fetch_sub(tail->size_, std::memory_order_relaxed); + // dirtyNodes_ changes SHOULD after the tail move. + dirtyNodes_.fetch_add(1, std::memory_order_release); + } + } + } + size_.fetch_add(recSize, std::memory_order_relaxed); + head->push_back(std::move(record)); +} + +void AtomicLogBuffer::reset() { + auto* p = head_.load(std::memory_order_relaxed); + int32_t count = 0; + while (p != nullptr) { + bool expected = false; + if (!p->markDeleted_.compare_exchange_strong(expected, true, std::memory_order_relaxed)) { + // The rest nodes has been mark deleted. + break; + } + p = p->next_; + ++count; + } + size_.store(0, std::memory_order_relaxed); + firstLogId_.store(0, std::memory_order_relaxed); + dirtyNodes_.fetch_add(count, std::memory_order_release); +} + +Node* AtomicLogBuffer::seek(LogID logId) { + auto* head = head_.load(std::memory_order_relaxed); + if (head != nullptr && logId > head->lastLogId()) { + VLOG(5) << "Bad seek, the seeking logId " << logId + << " is greater than the latest logId in buffer " << head->lastLogId(); + return nullptr; + } + auto* p = head; + if (p == nullptr) { + return nullptr; + } + auto* tail = tail_.load(std::memory_order_relaxed); + CHECK_NOTNULL(tail); + // The scan range is [head, tail] + // And we should ensure the nodes inside the range SHOULD NOT be deleted. + // We could ensure the tail during gc is older than current one. + while (p != tail->next_ && !p->markDeleted_) { + VLOG(5) << "current node firstLogId = " << p->firstLogId_ << ", the seeking logId = " << logId; + if (logId >= p->firstLogId_) { + break; + } + p = p->next_; + } + if (p == nullptr) { + return nullptr; + } + return p->markDeleted_ ? nullptr : p; +} + +void AtomicLogBuffer::releaseRef() { + // All operations following SHOULD NOT reordered before tail.load() + // so we could ensure the tail used in GC is older than new coming readers. + auto* tail = tail_.load(std::memory_order_acquire); + auto readers = refs_.fetch_sub(1, std::memory_order_relaxed); + VLOG(5) << "Release ref, readers = " << readers; + // todo(doodle): https://github.com/vesoft-inc/nebula-storage/issues/390 + if (readers > 1) { + return; + } + // In this position, maybe there are some new readers coming in + // So we should load tail before refs count down to ensure the tail current + // thread got is older than the new readers see. + + CHECK_EQ(1, readers); + + auto dirtyNodes = dirtyNodes_.load(std::memory_order_relaxed); + bool gcRunning = false; + + if (dirtyNodes > dirtyNodesLimit_) { + if (gcOnGoing_.compare_exchange_strong(gcRunning, true, std::memory_order_acquire)) { + VLOG(4) << "GC begins!"; + // It means no readers on the deleted nodes. + // Cut-off the list. + CHECK_NOTNULL(tail); + auto* dirtyHead = tail->next_; + tail->next_ = nullptr; + + // Now we begin to delete the nodes. + auto* curr = dirtyHead; + while (curr != nullptr) { + CHECK(curr->markDeleted_.load(std::memory_order_relaxed)); + VLOG(5) << "Delete node " << curr->firstLogId_; + auto* del = curr; + curr = curr->next_; + delete del; + dirtyNodes_.fetch_sub(1, std::memory_order_release); + CHECK_GE(dirtyNodes_, 0); + } + + gcOnGoing_.store(false, std::memory_order_release); + VLOG(4) << "GC finished!"; + } else { + VLOG(5) << "Current list is in gc now!"; + } + } +} + +} // namespace wal +} // namespace nebula diff --git a/src/kvstore/wal/AtomicLogBuffer.h b/src/kvstore/wal/AtomicLogBuffer.h index 9d3d973ffd1..9f150018a0e 100644 --- a/src/kvstore/wal/AtomicLogBuffer.h +++ b/src/kvstore/wal/AtomicLogBuffer.h @@ -17,6 +17,9 @@ namespace wal { constexpr int32_t kMaxLength = 64; +/** + * @brief Wal record in each Node, it is wrapper calls of wal log + */ struct Record { Record() = default; Record(const Record&) = default; @@ -35,13 +38,25 @@ struct Record { std::string msg_; }; +/** + * @brief A node contains fix count of wal + */ struct Node { Node() = default; + /** + * @brief Return current node is full or not + */ bool isFull() { return pos_.load(std::memory_order_acquire) == kMaxLength; } + /** + * @brief Add a record to current not + * + * @param rec Record to add + * @return Whether operation succeed or not + */ bool push_back(Record&& rec) { if (isFull()) { return false; @@ -53,6 +68,12 @@ struct Node { return true; } + /** + * @brief Fetch a record by index + * + * @param index Wal index in node + * @return Record* Wal record if exists + */ Record* rec(int32_t index) { if (UNLIKELY(index >= kMaxLength)) { return nullptr; @@ -63,6 +84,11 @@ struct Node { return &(*records_)[index]; } + /** + * @brief The last wal log id by counting how many wal in current node + * + * @return LogID The last wal log id + */ LogID lastLogId() const { return firstLogId_ + pos_.load(std::memory_order_relaxed); } @@ -85,81 +111,83 @@ struct Node { }; /** - * A wait-free log buffer for single writer, multi readers - * When deleting the extra node, to avoid read the dangling one, - * we just mark it to be deleted, and delete it when no readers using it. - * - * For write, most of time, it is o(1) - * For seek, it is o(n), n is the number of nodes inside current list, but in - * most cases, the seeking log is in the head node, so it equals o(1) - * */ + * @brief A wait-free log buffer for single writer, multi readers When deleting the extra node, to + * avoid read the dangling one, we just mark it to be deleted, and delete it when no readers using + * it. For write, most of time, it is o(1) For seek, it is o(n), n is the number of nodes inside + * current list, but in most cases, the seeking log is in the head node, so it equals o(1) + */ class AtomicLogBuffer : public std::enable_shared_from_this { FRIEND_TEST(AtomicLogBufferTest, ResetThenPushExceedLimit); public: /** - * The iterator once created, it could just see the snapshot of current list. - * In other words, the new records inserted during scanning are invisible. - * */ + * @brief The log iterator used in AtomicLogBuffer, all logs are in memory. Once the iterator is + * created, it could just see the snapshot of current list. In other words, the new records + * inserted during scanning are invisible. + */ class Iterator : public LogIterator { friend class AtomicLogBuffer; FRIEND_TEST(AtomicLogBufferTest, SingleWriterMultiReadersTest); public: + /** + * @brief Destroy the iterator, which would trigger gc if necessary + */ ~Iterator() { logBuffer_->releaseRef(); } - LogIterator& operator++() override { - currIndex_++; - currLogId_++; - if (currLogId_ > end_) { - valid_ = false; - currRec_ = nullptr; - return *this; - } - // Operations after load SHOULD NOT reorder before it. - auto pos = currNode_->pos_.load(std::memory_order_acquire); - VLOG(3) << "currNode firstLogId = " << currNode_->firstLogId_ - << ", currIndex = " << currIndex_ << ", currNode pos " << pos; - if (currIndex_ >= pos) { - currNode_ = currNode_->prev_.load(std::memory_order_relaxed); - if (currNode_ == nullptr) { - valid_ = false; - currRec_ = nullptr; - return *this; - } else { - currIndex_ = 0; - } - } - DCHECK_LT(currIndex_, kMaxLength); - currRec_ = DCHECK_NOTNULL(currNode_)->rec(currIndex_); - return *this; - } + /** + * @brief Move forward iterator to next wal record + * + * @return LogIterator& + */ + LogIterator& operator++() override; + /** + * @brief Return whether log iterator is valid + */ bool valid() const override { return valid_; } + /** + * @brief Return the log id pointed by current iterator + */ LogID logId() const override { DCHECK(valid_); return currLogId_; } + /** + * @brief Return the log term pointed by current iterator + */ TermID logTerm() const override { return record()->termId_; } + /** + * @brief Return the log source pointed by current iterator + */ ClusterID logSource() const override { return record()->clusterId_; } + /** + * @brief Return the log message pointed by current iterator + */ folly::StringPiece logMsg() const override { return record()->msg_; } private: - // Iterator could only be acquired by AtomicLogBuffer::iterator interface. + /** + * @brief Construct a new wal iterator in range [start, end] + * + * @param logBuffer Related log buffer + * @param start Start log id, inclusive + * @param end End log id, inclusive + */ Iterator(std::shared_ptr logBuffer, LogID start, LogID end) : logBuffer_(logBuffer), currLogId_(start) { logBuffer_->addRef(); @@ -167,6 +195,11 @@ class AtomicLogBuffer : public std::enable_shared_from_this { seek(currLogId_); } + /** + * @brief Return the wal record if valid + * + * @return const Record* The wal record + */ const Record* record() const { if (!valid_) { return nullptr; @@ -174,24 +207,27 @@ class AtomicLogBuffer : public std::enable_shared_from_this { return DCHECK_NOTNULL(currRec_); } - void seek(LogID logId) { - currNode_ = logBuffer_->seek(logId); - if (currNode_ != nullptr) { - currIndex_ = logId - currNode_->firstLogId_; - // Since reader is only a snapshot, a possible case is that logId > currNode->firstLogId_, - // however, the logId we search may not in currNode. (e.g. currNode_ is the latest node, - // but currIndex_ >= kMaxLength). In this case, currRec_ will be an invalid one. - currRec_ = currNode_->rec(currIndex_); - valid_ = (currRec_ != nullptr); - } else { - valid_ = false; - } - } - + /** + * @brief Seek the wal by log id, and initialize the iterator if valid + * + * @param logId The wal log id to seek + */ + void seek(LogID logId); + + /** + * @brief Return the current node of pointed by iterator + * + * @return Node* + */ Node* currNode() const { return currNode_; } + /** + * @brief Current index in wal node + * + * @return int32_t + */ int32_t currIndex() const { return currIndex_; } @@ -206,86 +242,53 @@ class AtomicLogBuffer : public std::enable_shared_from_this { Record* currRec_{nullptr}; }; + /** + * @brief Build the AtomicLogBuffer instance + * + * @param capacity Max capacity in bytes, when size exceeds capacity, which would trigger garbage + * collection + * @return std::shared_ptr + */ static std::shared_ptr instance(int32_t capacity = 8 * 1024 * 1024) { return std::shared_ptr(new AtomicLogBuffer(capacity)); } /** - * Users should ensure there are no readers when releasing it. - * */ - ~AtomicLogBuffer() { - auto refs = refs_.load(std::memory_order_acquire); - CHECK_EQ(0, refs); - auto* curr = head_.load(std::memory_order_relaxed); - auto* prev = curr; - while (curr != nullptr) { - curr = curr->next_; - delete prev; - prev = curr; - } - if (prev != nullptr) { - delete prev; - } - } + * @brief Destroy the atomic log buffer, users should ensure there are no readers when + * releasing it. + */ + ~AtomicLogBuffer(); + /** + * @brief Add a wal log to current buffer + * + * @param logId Log id + * @param termId Log term + * @param clusterId Cluster id of log + * @param msg Log message + */ void push(LogID logId, TermID termId, ClusterID clusterId, std::string&& msg) { push(logId, Record(clusterId, termId, std::move(msg))); } - void push(LogID logId, Record&& record) { - auto* head = head_.load(std::memory_order_relaxed); - auto recSize = record.size(); - if (head == nullptr || head->isFull() || head->markDeleted_.load(std::memory_order_relaxed)) { - auto* newNode = new Node(); - newNode->firstLogId_ = logId; - newNode->next_ = head; - newNode->push_back(std::move(record)); - if (head == nullptr || head->markDeleted_.load(std::memory_order_relaxed)) { - // It is the first Node in current list, or head has been marked as - // deleted - firstLogId_.store(logId, std::memory_order_relaxed); - tail_.store(newNode, std::memory_order_relaxed); - } else if (head != nullptr) { - head->prev_.store(newNode, std::memory_order_release); - } - size_.fetch_add(recSize, std::memory_order_relaxed); - head_.store(newNode, std::memory_order_relaxed); - return; - } - if (size_ + recSize > capacity_) { - auto* tail = tail_.load(std::memory_order_relaxed); - // todo(doodle): there is a potential problem is that: since Node::isFull - // is judged by log count, we can only add new node when previous node - // has enough logs. So when tail is equal to head, we need to wait tail is - // full, after head moves forward, at then tail can be marked as deleted. - // So the log buffer would takes up more memory than its capacity. Since - // it does not affect correctness, we could fix it later if necessary. - if (tail != head) { - // We have more than one nodes in current list. - // So we mark the tail to be deleted. - bool expected = false; - VLOG(3) << "Mark node " << tail->firstLogId_ << " to be deleted!"; - auto marked = - tail->markDeleted_.compare_exchange_strong(expected, true, std::memory_order_relaxed); - auto* prev = tail->prev_.load(std::memory_order_relaxed); - firstLogId_.store(prev->firstLogId_, std::memory_order_relaxed); - // All operations above SHOULD NOT be reordered. - tail_.store(tail->prev_, std::memory_order_release); - if (marked) { - size_.fetch_sub(tail->size_, std::memory_order_relaxed); - // dirtyNodes_ changes SHOULD after the tail move. - dirtyNodes_.fetch_add(1, std::memory_order_release); - } - } - } - size_.fetch_add(recSize, std::memory_order_relaxed); - head->push_back(std::move(record)); - } + /** + * @brief Add the wal record into current buffer + * + * @param logId Log id + * @param record Log record + */ + void push(LogID logId, Record&& record); + /** + * @brief Return the first log id in buffer + */ LogID firstLogId() const { return firstLogId_.load(std::memory_order_relaxed); } + /** + * @brief Return the last log id in buffer + */ LogID lastLogId() const { auto* p = head_.load(std::memory_order_relaxed); if (p == nullptr) { @@ -295,122 +298,56 @@ class AtomicLogBuffer : public std::enable_shared_from_this { } /** - * For reset operation, users should keep it thread-safe with push operation. - * Just mark all nodes to be deleted. - * - * Actually, we don't follow the invariant strictly (node in range [head, - * tail] are valid), head and tail are not modified. But once an log is pushed - * after reset, everything will obey the invariant. - * */ - void reset() { - auto* p = head_.load(std::memory_order_relaxed); - int32_t count = 0; - while (p != nullptr) { - bool expected = false; - if (!p->markDeleted_.compare_exchange_strong(expected, true, std::memory_order_relaxed)) { - // The rest nodes has been mark deleted. - break; - } - p = p->next_; - ++count; - } - size_.store(0, std::memory_order_relaxed); - firstLogId_.store(0, std::memory_order_relaxed); - dirtyNodes_.fetch_add(count, std::memory_order_release); - } + * @brief For reset operation, users should keep it thread-safe with push operation. Just mark all + * nodes to be deleted. Actually, we don't follow the invariant strictly (node in range [head, + * tail] are valid), head and tail are not modified. But once an log is pushed after reset, + * everything will obey the invariant. + */ + void reset(); + /** + * @brief Return the log iterator in range [start, end] + * + * @param start Start log id, inclusive + * @param end End log id, inclusive + * @return std::unique_ptr Log iterator + */ std::unique_ptr iterator(LogID start, LogID end) { std::unique_ptr iter(new Iterator(shared_from_this(), start, end)); return iter; } private: + /** + * @brief Construct a new Atomic Log Buffer object + * + * @param capacity Max capacity in bytes, when size exceeds capacity, which would trigger garbage + * collection + */ explicit AtomicLogBuffer(int32_t capacity) : capacity_(capacity) {} - /* - * Find the non-deleted node contains the logId. - * */ - Node* seek(LogID logId) { - auto* head = head_.load(std::memory_order_relaxed); - if (head != nullptr && logId > head->lastLogId()) { - VLOG(3) << "Bad seek, the seeking logId " << logId - << " is greater than the latest logId in buffer " << head->lastLogId(); - return nullptr; - } - auto* p = head; - if (p == nullptr) { - return nullptr; - } - auto* tail = tail_.load(std::memory_order_relaxed); - CHECK_NOTNULL(tail); - // The scan range is [head, tail] - // And we should ensure the nodes inside the range SHOULD NOT be deleted. - // We could ensure the tail during gc is older than current one. - while (p != tail->next_ && !p->markDeleted_) { - VLOG(3) << "current node firstLogId = " << p->firstLogId_ - << ", the seeking logId = " << logId; - if (logId >= p->firstLogId_) { - break; - } - p = p->next_; - } - if (p == nullptr) { - return nullptr; - } - return p->markDeleted_ ? nullptr : p; - } + /** + * @brief Find the noe which contains the log with given id + * + * @param logId Log it to seek + * @return Node* Return the node contains the log, return nullptr if not found + */ + Node* seek(LogID logId); + /** + * @brief Add a refernce count of how many iterator exists + * + * @return int32_t Reference count + */ int32_t addRef() { return refs_.fetch_add(1, std::memory_order_relaxed); } - void releaseRef() { - // All operations following SHOULD NOT reordered before tail.load() - // so we could ensure the tail used in GC is older than new coming readers. - auto* tail = tail_.load(std::memory_order_acquire); - auto readers = refs_.fetch_sub(1, std::memory_order_relaxed); - VLOG(3) << "Release ref, readers = " << readers; - // todo(doodle): https://github.com/vesoft-inc/nebula-storage/issues/390 - if (readers > 1) { - return; - } - // In this position, maybe there are some new readers coming in - // So we should load tail before refs count down to ensure the tail current - // thread got is older than the new readers see. - - CHECK_EQ(1, readers); - - auto dirtyNodes = dirtyNodes_.load(std::memory_order_relaxed); - bool gcRunning = false; - - if (dirtyNodes > dirtyNodesLimit_) { - if (gcOnGoing_.compare_exchange_strong(gcRunning, true, std::memory_order_acquire)) { - VLOG(1) << "GC begins!"; - // It means no readers on the deleted nodes. - // Cut-off the list. - CHECK_NOTNULL(tail); - auto* dirtyHead = tail->next_; - tail->next_ = nullptr; - - // Now we begin to delete the nodes. - auto* curr = dirtyHead; - while (curr != nullptr) { - CHECK(curr->markDeleted_.load(std::memory_order_relaxed)); - VLOG(1) << "Delete node " << curr->firstLogId_; - auto* del = curr; - curr = curr->next_; - delete del; - dirtyNodes_.fetch_sub(1, std::memory_order_release); - CHECK_GE(dirtyNodes_, 0); - } - - gcOnGoing_.store(false, std::memory_order_release); - VLOG(1) << "GC finished!"; - } else { - VLOG(1) << "Current list is in gc now!"; - } - } - } + /** + * @brief Release the node if there are two many dirty nodes + * + */ + void releaseRef(); private: std::atomic head_{nullptr}; diff --git a/src/kvstore/wal/CMakeLists.txt b/src/kvstore/wal/CMakeLists.txt index f0bc0e66539..ad39273f2ed 100644 --- a/src/kvstore/wal/CMakeLists.txt +++ b/src/kvstore/wal/CMakeLists.txt @@ -2,6 +2,7 @@ nebula_add_library( wal_obj OBJECT FileBasedWal.cpp WalFileIterator.cpp + AtomicLogBuffer.cpp ) nebula_add_subdirectory(test) diff --git a/src/kvstore/wal/FileBasedWal.cpp b/src/kvstore/wal/FileBasedWal.cpp index e55f3e72bbc..a10e5f4f7be 100644 --- a/src/kvstore/wal/FileBasedWal.cpp +++ b/src/kvstore/wal/FileBasedWal.cpp @@ -63,8 +63,8 @@ FileBasedWal::FileBasedWal(const folly::StringPiece dir, auto& info = walFiles_.rbegin()->second; lastLogId_ = info->lastId(); lastLogTerm_ = info->lastTerm(); - LOG(INFO) << idStr_ << "lastLogId in wal is " << lastLogId_ << ", lastLogTerm is " - << lastLogTerm_ << ", path is " << info->path(); + VLOG(2) << idStr_ << "lastLogId in wal is " << lastLogId_ << ", lastLogTerm is " << lastLogTerm_ + << ", path is " << info->path(); currFd_ = open(info->path(), O_WRONLY | O_APPEND); currInfo_ = info; if (currFd_ < 0) { @@ -79,7 +79,7 @@ FileBasedWal::~FileBasedWal() { // moment, there should have no other thread holding this WAL object // Close the last file closeCurrFile(); - LOG(INFO) << idStr_ << "~FileBasedWal, dir = " << dir_; + VLOG(2) << idStr_ << "~FileBasedWal, dir = " << dir_; } void FileBasedWal::scanAllWalFiles() { @@ -90,7 +90,7 @@ void FileBasedWal::scanAllWalFiles() { std::vector parts; folly::split('.', fn, parts); if (parts.size() != 2) { - LOG(ERROR) << "Ignore unknown file \"" << fn << "\""; + LOG(WARNING) << "Ignore unknown file \"" << fn << "\""; continue; } @@ -98,7 +98,7 @@ void FileBasedWal::scanAllWalFiles() { try { startIdFromName = folly::to(parts[0]); } catch (const std::exception& ex) { - LOG(ERROR) << "Ignore bad file name \"" << fn << "\""; + LOG(WARNING) << "Ignore bad file name \"" << fn << "\""; continue; } @@ -109,7 +109,7 @@ void FileBasedWal::scanAllWalFiles() { // Get the size of the file and the mtime struct stat st; if (lstat(info->path(), &st) < 0) { - LOG(ERROR) << "Failed to get the size and mtime for \"" << fn << "\", ignore it"; + LOG(WARNING) << "Failed to get the size and mtime for \"" << fn << "\", ignore it"; continue; } info->setSize(st.st_size); @@ -126,58 +126,58 @@ void FileBasedWal::scanAllWalFiles() { // Open the file int32_t fd = open(info->path(), O_RDONLY); if (fd < 0) { - LOG(ERROR) << "Failed to open the file \"" << fn << "\" (" << errno - << "): " << strerror(errno); + LOG(WARNING) << "Failed to open the file \"" << fn << "\" (" << errno + << "): " << strerror(errno); continue; } // Read the first log id LogID firstLogId = -1; if (read(fd, &firstLogId, sizeof(LogID)) != sizeof(LogID)) { - LOG(ERROR) << "Failed to read the first log id from \"" << fn << "\" (" << errno - << "): " << strerror(errno); + LOG(WARNING) << "Failed to read the first log id from \"" << fn << "\" (" << errno + << "): " << strerror(errno); close(fd); continue; } if (firstLogId != startIdFromName) { - LOG(ERROR) << "The first log id " << firstLogId << " does not match the file name \"" << fn - << "\", ignore it!"; + LOG(WARNING) << "The first log id " << firstLogId << " does not match the file name \"" << fn + << "\", ignore it!"; close(fd); continue; } // Read the last log length if (lseek(fd, -sizeof(int32_t), SEEK_END) < 0) { - LOG(ERROR) << "Failed to seek the last log length from \"" << fn << "\" (" << errno - << "): " << strerror(errno); + LOG(WARNING) << "Failed to seek the last log length from \"" << fn << "\" (" << errno + << "): " << strerror(errno); close(fd); continue; } int32_t succMsgLen; if (read(fd, &succMsgLen, sizeof(int32_t)) != sizeof(int32_t)) { - LOG(ERROR) << "Failed to read the last log length from \"" << fn << "\" (" << errno - << "): " << strerror(errno); + LOG(WARNING) << "Failed to read the last log length from \"" << fn << "\" (" << errno + << "): " << strerror(errno); close(fd); continue; } // Verify the last log length if (lseek(fd, -(sizeof(int32_t) * 2 + succMsgLen + sizeof(ClusterID)), SEEK_END) < 0) { - LOG(ERROR) << "Failed to seek the last log length from \"" << fn << "\" (" << errno - << "): " << strerror(errno); + LOG(WARNING) << "Failed to seek the last log length from \"" << fn << "\" (" << errno + << "): " << strerror(errno); close(fd); continue; } int32_t precMsgLen; if (read(fd, &precMsgLen, sizeof(int32_t)) != sizeof(int32_t)) { - LOG(ERROR) << "Failed to read the last log length from \"" << fn << "\" (" << errno - << "): " << strerror(errno); + LOG(WARNING) << "Failed to read the last log length from \"" << fn << "\" (" << errno + << "): " << strerror(errno); close(fd); continue; } if (precMsgLen != succMsgLen) { - LOG(ERROR) << "It seems the wal file \"" << fn << "\" is corrupted. Ignore it"; + LOG(WARNING) << "It seems the wal file \"" << fn << "\" is corrupted. Ignore it"; // TODO We might want to fix it as much as possible close(fd); continue; @@ -187,15 +187,15 @@ void FileBasedWal::scanAllWalFiles() { if (lseek(fd, -(sizeof(int32_t) * 2 + succMsgLen + sizeof(ClusterID) + sizeof(TermID)), SEEK_END) < 0) { - LOG(ERROR) << "Failed to seek the last log term from \"" << fn << "\" (" << errno - << "): " << strerror(errno); + LOG(WARNING) << "Failed to seek the last log term from \"" << fn << "\" (" << errno + << "): " << strerror(errno); close(fd); continue; } TermID term = -1; if (read(fd, &term, sizeof(TermID)) != sizeof(TermID)) { - LOG(ERROR) << "Failed to read the last log term from \"" << fn << "\" (" << errno - << "): " << strerror(errno); + LOG(WARNING) << "Failed to read the last log term from \"" << fn << "\" (" << errno + << "): " << strerror(errno); close(fd); continue; } @@ -206,15 +206,15 @@ void FileBasedWal::scanAllWalFiles() { -(sizeof(int32_t) * 2 + succMsgLen + sizeof(ClusterID) + sizeof(TermID) + sizeof(LogID)), SEEK_END) < 0) { - LOG(ERROR) << "Failed to seek the last log id from \"" << fn << "\" (" << errno - << "): " << strerror(errno); + LOG(WARNING) << "Failed to seek the last log id from \"" << fn << "\" (" << errno + << "): " << strerror(errno); close(fd); continue; } LogID lastLogId = -1; if (read(fd, &lastLogId, sizeof(LogID)) != sizeof(LogID)) { - LOG(ERROR) << "Failed to read the last log id from \"" << fn << "\" (" << errno - << "): " << strerror(errno); + LOG(WARNING) << "Failed to read the last log id from \"" << fn << "\" (" << errno + << "): " << strerror(errno); close(fd); continue; } @@ -242,8 +242,8 @@ void FileBasedWal::scanAllWalFiles() { for (++it; it != walFiles_.end(); ++it) { if (it->second->firstId() > prevLastId + 1) { // Found a gap - LOG(ERROR) << "Found a log id gap before " << it->second->firstId() - << ", the previous log id is " << prevLastId; + LOG(WARNING) << "Found a log id gap before " << it->second->firstId() + << ", the previous log id is " << prevLastId; logIdAfterLastGap = it->second->firstId(); } prevLastId = it->second->lastId(); @@ -252,7 +252,7 @@ void FileBasedWal::scanAllWalFiles() { // Found gap, remove all logs before the last gap it = walFiles_.begin(); while (it->second->firstId() < logIdAfterLastGap) { - LOG(INFO) << "Removing the wal file \"" << it->second->path() << "\""; + LOG(WARNING) << "Removing the wal file \"" << it->second->path() << "\""; unlink(it->second->path()); it = walFiles_.erase(it); } @@ -284,7 +284,7 @@ void FileBasedWal::closeCurrFile() { struct utimbuf timebuf; timebuf.modtime = currInfo_->mtime(); timebuf.actime = currInfo_->mtime(); - VLOG(1) << "Close cur file " << currInfo_->path() << ", mtime: " << currInfo_->mtime(); + VLOG(4) << "Close cur file " << currInfo_->path() << ", mtime: " << currInfo_->mtime(); utime(currInfo_->path(), &timebuf); currInfo_.reset(); } @@ -295,7 +295,7 @@ void FileBasedWal::prepareNewFile(LogID startLogId) { // Prepare the last entry in walFiles_ WalFileInfoPtr info = std::make_shared( FileUtils::joinPath(dir_, folly::stringPrintf("%019ld.wal", startLogId)), startLogId); - VLOG(1) << idStr_ << "Write new file " << info->path(); + VLOG(4) << idStr_ << "Write new file " << info->path(); walFiles_.emplace(std::make_pair(startLogId, info)); // Create the file for write @@ -356,7 +356,7 @@ void FileBasedWal::rollbackInFile(WalFileInfoPtr info, LogID logId) { CHECK_GT(pos, 0) << "This wal should have been deleted"; if (pos < FileUtils::fileSize(path)) { - LOG(INFO) << idStr_ << "Need to truncate from offset " << pos; + VLOG(4) << idStr_ << "Need to truncate from offset " << pos; if (ftruncate(fd, pos) < 0) { LOG(FATAL) << "Failed to truncate file \"" << path << "\" (errno: " << errno << "): " << strerror(errno); @@ -389,7 +389,7 @@ void FileBasedWal::scanLastWal(WalFileInfoPtr info, LogID firstId) { } if (id != curLogId) { - LOG(ERROR) << "LogId is not consistent" << id << " " << curLogId; + LOG(WARNING) << "LogId is not consistent" << id << " " << curLogId; break; } @@ -413,7 +413,7 @@ void FileBasedWal::scanLastWal(WalFileInfoPtr info, LogID firstId) { } if (head != foot) { - LOG(ERROR) << "Message size doesn't match: " << head << " != " << foot; + LOG(WARNING) << "Message size doesn't match: " << head << " != " << foot; break; } @@ -440,19 +440,14 @@ void FileBasedWal::scanLastWal(WalFileInfoPtr info, LogID firstId) { } bool FileBasedWal::appendLogInternal(LogID id, TermID term, ClusterID cluster, std::string msg) { - if (stopped_) { - LOG(ERROR) << idStr_ << "WAL has stopped. Do not accept logs any more"; - return false; - } - if (lastLogId_ != 0 && firstLogId_ != 0 && id != lastLogId_ + 1) { - LOG(ERROR) << idStr_ << "There is a gap in the log id. The last log id is " << lastLogId_ - << ", and the id being appended is " << id; + VLOG(3) << idStr_ << "There is a gap in the log id. The last log id is " << lastLogId_ + << ", and the id being appended is " << id; return false; } if (!preProcessor_(id, term, cluster, msg)) { - LOG(ERROR) << idStr_ << "Pre process failed for log " << id; + VLOG(3) << idStr_ << "Pre process failed for log " << id; return false; } @@ -504,11 +499,11 @@ bool FileBasedWal::appendLogInternal(LogID id, TermID term, ClusterID cluster, s bool FileBasedWal::appendLog(LogID id, TermID term, ClusterID cluster, std::string msg) { if (diskMan_ && !diskMan_->hasEnoughSpace(spaceId_, partId_)) { - LOG_EVERY_N(WARNING, 100) << idStr_ << "Failed to appendLogs because of no more space"; + VLOG_EVERY_N(2, 1000) << idStr_ << "Failed to appendLogs because of no more space"; return false; } if (!appendLogInternal(id, term, cluster, std::move(msg))) { - LOG(ERROR) << "Failed to append log for logId " << id; + VLOG(3) << "Failed to append log for logId " << id; return false; } return true; @@ -516,13 +511,13 @@ bool FileBasedWal::appendLog(LogID id, TermID term, ClusterID cluster, std::stri bool FileBasedWal::appendLogs(LogIterator& iter) { if (diskMan_ && !diskMan_->hasEnoughSpace(spaceId_, partId_)) { - LOG_EVERY_N(WARNING, 100) << idStr_ << "Failed to appendLogs because of no more space"; + VLOG_EVERY_N(2, 1000) << idStr_ << "Failed to appendLogs because of no more space"; return false; } for (; iter.valid(); ++iter) { if (!appendLogInternal( iter.logId(), iter.logTerm(), iter.logSource(), iter.logMsg().toString())) { - LOG(ERROR) << idStr_ << "Failed to append log for logId " << iter.logId(); + VLOG(3) << idStr_ << "Failed to append log for logId " << iter.logId(); return false; } } @@ -542,15 +537,15 @@ bool FileBasedWal::linkCurrentWAL(const char* newPath) { closeCurrFile(); std::lock_guard g(walFilesMutex_); if (walFiles_.empty()) { - LOG(INFO) << idStr_ << "No wal files found, skip link"; + VLOG(3) << idStr_ << "No wal files found, skip link"; return true; } if (fs::FileUtils::exist(newPath) && !fs::FileUtils::remove(newPath, true)) { - LOG(ERROR) << "Remove exist dir failed of wal : " << newPath; + VLOG(3) << "Remove exist dir failed of wal : " << newPath; return false; } if (!fs::FileUtils::makeDir(newPath)) { - LOG(INFO) << idStr_ << "Link file parent dir make failed : " << newPath; + VLOG(3) << idStr_ << "Link file parent dir make failed : " << newPath; return false; } @@ -559,11 +554,11 @@ bool FileBasedWal::linkCurrentWAL(const char* newPath) { auto targetFile = fs::FileUtils::joinPath(newPath, folly::stringPrintf("%019ld.wal", f.first)); if (link(f.second->path(), targetFile.data()) != 0) { - LOG(INFO) << idStr_ << "Create link failed for " << f.second->path() << " on " << newPath - << ", error:" << strerror(errno); + VLOG(3) << idStr_ << "Create link failed for " << f.second->path() << " on " << newPath + << ", error:" << strerror(errno); return false; } - LOG(INFO) << idStr_ << "Create link success for " << f.second->path() << " on " << newPath; + VLOG(3) << idStr_ << "Create link success for " << f.second->path() << " on " << newPath; } return true; @@ -571,8 +566,8 @@ bool FileBasedWal::linkCurrentWAL(const char* newPath) { bool FileBasedWal::rollbackToLog(LogID id) { if (id < firstLogId_ - 1 || id > lastLogId_) { - LOG(ERROR) << idStr_ << "Rollback target id " << id << " is not in the range of [" - << firstLogId_ << "," << lastLogId_ << "] of WAL"; + VLOG(4) << idStr_ << "Rollback target id " << id << " is not in the range of [" << firstLogId_ + << "," << lastLogId_ << "] of WAL"; return false; } @@ -593,7 +588,7 @@ bool FileBasedWal::rollbackToLog(LogID id) { // are rolled back while (it != walFiles_.end()) { // Need to remove the file - VLOG(1) << "Removing file " << it->second->path(); + VLOG(4) << "Removing file " << it->second->path(); unlink(it->second->path()); it = walFiles_.erase(it); } @@ -606,7 +601,7 @@ bool FileBasedWal::rollbackToLog(LogID id) { lastLogId_ = 0; lastLogTerm_ = 0; } else { - VLOG(1) << "Roll back to log " << id << ", the last WAL file is now \"" + VLOG(4) << "Roll back to log " << id << ", the last WAL file is now \"" << walFiles_.rbegin()->second->path() << "\""; rollbackInFile(walFiles_.rbegin()->second, id); CHECK_EQ(lastLogId_, id); @@ -632,7 +627,7 @@ bool FileBasedWal::reset() { std::vector files = FileUtils::listAllFilesInDir(dir_.c_str(), false, "*.wal"); for (auto& fn : files) { auto absFn = FileUtils::joinPath(dir_, fn); - VLOG(1) << "Removing " << absFn; + VLOG(3) << "Removing " << absFn; unlink(absFn.c_str()); } lastLogId_ = firstLogId_ = 0; @@ -660,7 +655,7 @@ void FileBasedWal::cleanWAL() { while (it != walFiles_.end()) { // keep at least two wal if (index++ < size - 2 && (now - it->second->mtime() > walTTL)) { - VLOG(1) << "Clean wals, Remove " << it->second->path() << ", now: " << now + VLOG(3) << "Clean wals, Remove " << it->second->path() << ", now: " << now << ", mtime: " << it->second->mtime(); unlink(it->second->path()); it = walFiles_.erase(it); @@ -670,7 +665,7 @@ void FileBasedWal::cleanWAL() { } } if (count > 0) { - LOG(INFO) << idStr_ << "Clean wals number " << count; + VLOG(2) << idStr_ << "Clean wals number " << count; } firstLogId_ = walFiles_.begin()->second->firstId(); } @@ -682,8 +677,8 @@ void FileBasedWal::cleanWAL(LogID id) { } if (walFiles_.rbegin()->second->lastId() < id) { - LOG(WARNING) << "Try to clean wal not existed " << id << ", lastWal is " - << walFiles_.rbegin()->second->lastId(); + VLOG(3) << "Try to clean wal not existed " << id << ", lastWal is " + << walFiles_.rbegin()->second->lastId(); return; } @@ -691,7 +686,7 @@ void FileBasedWal::cleanWAL(LogID id) { auto iter = walFiles_.begin(); while (iter != walFiles_.end()) { if (iter->second->lastId() < id) { - VLOG(1) << "Clean wals, Remove " << iter->second->path(); + VLOG(3) << "Clean wals, Remove " << iter->second->path(); unlink(iter->second->path()); iter = walFiles_.erase(iter); } else { diff --git a/src/kvstore/wal/FileBasedWal.h b/src/kvstore/wal/FileBasedWal.h index 881edfc91fb..54f3fe65af1 100644 --- a/src/kvstore/wal/FileBasedWal.h +++ b/src/kvstore/wal/FileBasedWal.h @@ -50,6 +50,16 @@ class FileBasedWal final : public Wal, public std::enable_shared_from_this + */ static std::shared_ptr getWal( const folly::StringPiece dir, FileBasedWalInfo info, @@ -57,73 +67,114 @@ class FileBasedWal final : public Wal, public std::enable_shared_from_this diskMan = nullptr); + /** + * @brief Destroy the file based wal + */ virtual ~FileBasedWal(); - // Signal all WAL holders to stop using this WAL - void stop() { - stopped_ = true; - } - bool isStopped() const { - return stopped_.load(); - } - - // Return the ID of the first log message in the WAL + /** + * @brief Return the ID of the first log message in the WAL + */ LogID firstLogId() const override { return firstLogId_; } - // Return the ID of the last log message in the WAL + /** + * @brief Return the ID of the last log message in the WAL + */ LogID lastLogId() const override { return lastLogId_; } - // Return the term when the the last log is received + /** + * @brief Return the term of the last log message in the WAL + */ TermID lastLogTerm() const override { return lastLogTerm_; } - // Return the term of specified logId, if not exist,return -1 + /** + * @brief Return the term of specified logId, if not exist,return -1 + */ TermID getLogTerm(LogID id) override; - // Append one log messages to the WAL - // This method **IS NOT** thread-safe - // we **DO NOT** expect multiple threads will append logs simultaneously + /** + * @brief Append one log messages to the WAL. This method **IS NOT** thread-safe. We **DO NOT** + * expect multiple threads will append logs simultaneously + * + * @param id Log id to append + * @param term Log term to append + * @param cluster Cluster id in log to append + * @param msg Log messgage to append + * @return Wheter append succeed + */ bool appendLog(LogID id, TermID term, ClusterID cluster, std::string msg) override; - // Append a list of log messages to the WAL - // This method **IS NOT** thread-safe - // we **DO NOT** expect multiple threads will append logs - // simultaneously + // + /** + * @brief Append a list of log messages to the WAL. This method **IS NOT** thread-safe. We **DO + * NOT** expect multiple threads will append logs simultaneously + * + * @param iter Log iterator to append + * @return Wheter append succeed + */ bool appendLogs(LogIterator& iter) override; - // Rollback to the given ID, all logs after the ID will be discarded - // This method **IS NOT** thread-safe - // we **EXPECT** the thread rolling back logs is the same one - // appending logs + /** + * @brief Rollback to the given ID, all logs after the ID will be discarded. This method **IS + * NOT** thread-safe. We **EXPECT** the thread rolling back logs is the same one appending logs + * + * @param id The log id to rollback + * @return Whether rollback succeed + */ bool rollbackToLog(LogID id) override; - // Reset the WAL - // This method is *NOT* thread safe + /** + * @brief Reset the WAL. This method is *NOT* thread safe + * + * @return Whether reset succeed + */ bool reset() override; + /** + * @brief Clean time expired wal by ttl + */ void cleanWAL() override; + /** + * @brief Clean wal by given log id + */ void cleanWAL(LogID id) override; - // Scan [firstLogId, lastLogId] - // This method IS thread-safe + /** + * @brief Scan the wal in range [firstLogId, lastLogId]. This method is thread-safe + * + * @param firstLogId Start log id, inclusive + * @param lastLogId End log id, inclusive + * @return std::unique_ptr + */ std::unique_ptr iterator(LogID firstLogId, LogID lastLogId) override; - /** It is not thread-safe */ + /** + * @brief Hard link the wal files to a new path + * + * @param newPath New wal path + * @return Whether link succeed + */ bool linkCurrentWAL(const char* newPath) override; - // Iterates through all wal file info in reversed order - // (from the latest to the earliest) - // The iteration finishes when the functor returns false or reaches - // the end - // The method returns the number of wal file info being accessed + /** + * @brief Iterates through all wal file info in reversed order (from the latest to the earliest). + * The iteration finishes when the functor returns false or reaches the end. + * + * @param fn The function to process wal info + * @return size_t The num of wal file info being accessed + */ size_t accessAllWalInfo(std::function fn) const; + /** + * @brief Return the log buffer in memory + */ std::shared_ptr buffer() { return logBuffer_; } @@ -134,27 +185,64 @@ class FileBasedWal final : public Wal, public std::enable_shared_from_this diskMan); - // Scan all WAL files + /** + * @brief Scan all WAL files + */ void scanAllWalFiles(); + /** + * @brief Scan the last wal file by each wal log + * + * @param info Wal file info + * @param firstId The first log id in the last wal file + */ void scanLastWal(WalFileInfoPtr info, LogID firstId); - // Close down the current wal file + /** + * @brief Close down the current wal file + */ void closeCurrFile(); - // Prepare a new wal file starting from the given log id + + /** + * @brief Prepare a new wal file starting from the given log id + * + * @param startLogId The first log id of new wal file + */ void prepareNewFile(LogID startLogId); - // Rollback to logId in given file + + /** + * @brief Rollback to logId in given file + * + * @param info The wal file to rollback + * @param logId The wal log id, it should be the last log id in file after rollback + */ void rollbackInFile(WalFileInfoPtr info, LogID logId); - // Implementation of appendLog() + /** + * @brief The actaul implementation of appendLog() + * + * @param id Log id to append + * @param term Log term to append + * @param cluster Cluster id in log to append + * @param msg Log messgage to append + * @return Wheter append succeed + */ bool appendLogInternal(LogID id, TermID term, ClusterID cluster, std::string msg); private: @@ -170,8 +258,6 @@ class FileBasedWal final : public Wal, public std::enable_shared_from_this stopped_{false}; - const FileBasedWalPolicy policy_; LogID firstLogId_{0}; LogID lastLogId_{0}; diff --git a/src/kvstore/wal/Wal.h b/src/kvstore/wal/Wal.h index debc527e41a..9bfccac08ef 100644 --- a/src/kvstore/wal/Wal.h +++ b/src/kvstore/wal/Wal.h @@ -19,43 +19,86 @@ class Wal { public: virtual ~Wal() = default; - // Return the ID of the first log message in the WAL + /** + * @brief Return the ID of the first log message in the WAL + */ virtual LogID firstLogId() const = 0; - // Return the ID of the last log message in the WAL + /** + * @brief Return the ID of the last log message in the WAL + */ virtual LogID lastLogId() const = 0; - // Return the term to receive the last log + /** + * @brief Return the term of the last log message in the WAL + */ virtual TermID lastLogTerm() const = 0; - // Return the term of specified logId, if not exist, return -1 + /** + * @brief Return the term of specified logId, if not exist,return -1 + */ virtual TermID getLogTerm(LogID id) = 0; - // Append one log message to the WAL + /** + * @brief Append one log message to the WAL + * + * @param id Log id to append + * @param term Log term to append + * @param cluster Cluster id in log to append + * @param msg Log messgage to append + * @return Wheter append succeed + */ virtual bool appendLog(LogID id, TermID term, ClusterID cluster, std::string msg) = 0; // Append a list of log messages to the WAL + /** + * @brief Append a list of log messages to the WAL. + * + * @param iter Log iterator to append + * @return Wheter append succeed + */ virtual bool appendLogs(LogIterator& iter) = 0; - // Rollback to the given id, all logs after the id will be discarded + /** + * @brief Rollback to the given id, all logs after the id will be discarded + * + * @param id The log id to rollback + * @return Whether rollback succeed + */ virtual bool rollbackToLog(LogID id) = 0; /** - * Create hard link for current wal on the new path. - * */ + * @brief Hard link the wal files to a new path + * + * @param newPath New wal path + * @return Whether link succeed + */ virtual bool linkCurrentWAL(const char* newPath) = 0; - // Clean all wal files - // This method is *NOT* thread safe + /** + * @brief Reset the WAL. This method is *NOT* thread safe + * + * @return Whether reset succeed + */ virtual bool reset() = 0; - // clean time expired wal of wal_ttl + /** + * @brief Clean time expired wal by ttl + */ virtual void cleanWAL() = 0; - // clean the wal before given log id + /** + * @brief Clean wal by given log id + */ virtual void cleanWAL(LogID id) = 0; - // Scan [firstLogId, lastLogId] + /** + * @brief Scan the wal in range [firstLogId, lastLogId]. + * + * @param firstLogId Start log id, inclusive + * @param lastLogId End log id, inclusive + * @return std::unique_ptr + */ virtual std::unique_ptr iterator(LogID firstLogId, LogID lastLogId) = 0; }; diff --git a/src/kvstore/wal/WalFileInfo.h b/src/kvstore/wal/WalFileInfo.h index 3fcd048e5f4..685007dba3f 100644 --- a/src/kvstore/wal/WalFileInfo.h +++ b/src/kvstore/wal/WalFileInfo.h @@ -13,8 +13,17 @@ namespace nebula { namespace wal { +/** + * @brief File info of wal file + */ class WalFileInfo final { public: + /** + * @brief Construct a new wal file info + * + * @param path Wal path + * @param firstId First log in wal file + */ WalFileInfo(std::string path, LogID firstId) : fullpath_(std::move(path)), firstLogId_(firstId), @@ -23,38 +32,88 @@ class WalFileInfo final { mtime_(0), size_(0) {} + /** + * @brief The wal file full path + * + * @return const char* + */ const char* path() const { return fullpath_.c_str(); } + /** + * @brief Return first log id of wal file + */ LogID firstId() const { return firstLogId_; } + /** + * @brief Return last log id of wal file + */ LogID lastId() const { return lastLogId_; } + + /** + * @brief Set the last log id + * + * @param id + */ void setLastId(LogID id) { lastLogId_ = id; } + /** + * @brief Return last log term of wal file + * + * @return TermID + */ TermID lastTerm() const { return lastLogTerm_; } + + /** + * @brief Set the last log term + * + * @param term + */ void setLastTerm(TermID term) { lastLogTerm_ = term; } + /** + * @brief Get the last modify time of wal file + * + * @return time_t + */ time_t mtime() const { return mtime_; } + + /** + * @brief Set the last modify time of wal file + * + * @param time UTC time + */ void setMTime(time_t time) { mtime_ = time; } + /** + * @brief Get the wal file size + * + * @return size_t File size in bytes + */ size_t size() const { return size_; } + + /** + * @brief Set the wal file size + * + * @param size File size in bytes + */ void setSize(size_t size) { size_ = size; } diff --git a/src/kvstore/wal/WalFileIterator.cpp b/src/kvstore/wal/WalFileIterator.cpp index c390dbb5e75..8bed62d8f0a 100644 --- a/src/kvstore/wal/WalFileIterator.cpp +++ b/src/kvstore/wal/WalFileIterator.cpp @@ -15,13 +15,13 @@ namespace wal { WalFileIterator::WalFileIterator(std::shared_ptr wal, LogID startId, LogID lastId) : wal_(wal), lastId_(lastId), currId_(startId) { if (currId_ > lastId_) { - LOG(ERROR) << wal_->idStr_ << "The log " << currId_ << " is out of range, the lastLogId is " - << lastId_; + VLOG(3) << wal_->idStr_ << "The log " << currId_ << " is out of range, the lastLogId is " + << lastId_; return; } if (startId < wal_->firstLogId()) { - VLOG(1) << wal_->idStr_ << "The given log id " << startId + VLOG(3) << wal_->idStr_ << "The given log id " << startId << " is out of the range, the wal firstLogId is " << wal_->firstLogId(); currId_ = lastId_ + 1; return; @@ -31,8 +31,8 @@ WalFileIterator::WalFileIterator(std::shared_ptr wal, LogID startI wal_->accessAllWalInfo([this](WalFileInfoPtr info) { int fd = open(info->path(), O_RDONLY); if (fd < 0) { - LOG(ERROR) << "Failed to open wal file \"" << info->path() << "\" (" << errno - << "): " << strerror(errno); + LOG(WARNING) << "Failed to open wal file \"" << info->path() << "\" (" << errno + << "): " << strerror(errno); currId_ = lastId_ + 1; return false; } @@ -48,7 +48,7 @@ WalFileIterator::WalFileIterator(std::shared_ptr wal, LogID startI }); if (idRanges_.empty() || idRanges_.front().first > currId_) { - LOG(ERROR) << "LogID " << currId_ << " is out of the wal files range"; + VLOG(3) << "LogID " << currId_ << " is out of the wal files range"; currId_ = lastId_ + 1; return; } @@ -108,7 +108,7 @@ LogIterator& WalFileIterator::operator++() { ++currId_; if (currId_ >= nextFirstId_) { // Need to roll over to next file - VLOG(2) << "Current ID is " << currId_ << ", and the first ID in the next file is " + VLOG(4) << "Current ID is " << currId_ << ", and the first ID in the next file is " << nextFirstId_ << ", so need to move to the next file"; // Close the current file CHECK_EQ(close(fds_.front()), 0); @@ -142,7 +142,7 @@ LogIterator& WalFileIterator::operator++() { // Read the logID if (pread(fd, reinterpret_cast(&logId), sizeof(LogID), currPos_) != static_cast(sizeof(LogID))) { - LOG(WARNING) << "Failed to read logId currPos = " << currPos_; + VLOG(3) << "Failed to read logId currPos = " << currPos_; eof_ = true; break; } @@ -151,7 +151,7 @@ LogIterator& WalFileIterator::operator++() { if (pread( fd, reinterpret_cast(&currTerm_), sizeof(TermID), currPos_ + sizeof(LogID)) != static_cast(sizeof(TermID))) { - LOG(WARNING) << "Failed to read term currPos = " << currPos_; + VLOG(3) << "Failed to read term currPos = " << currPos_; eof_ = true; break; } @@ -161,7 +161,7 @@ LogIterator& WalFileIterator::operator++() { sizeof(int32_t), currPos_ + sizeof(TermID) + sizeof(LogID)) != static_cast(sizeof(int32_t))) { - LOG(WARNING) << "Failed to read log length currPos = " << currPos_; + VLOG(3) << "Failed to read log length currPos = " << currPos_; eof_ = true; break; } diff --git a/src/kvstore/wal/WalFileIterator.h b/src/kvstore/wal/WalFileIterator.h index 2fb032bc53d..def4496b31b 100644 --- a/src/kvstore/wal/WalFileIterator.h +++ b/src/kvstore/wal/WalFileIterator.h @@ -14,27 +14,61 @@ namespace wal { class FileBasedWal; +/** + * @brief The log iterator used in AtomicLogBuffer, all logs are in wal file. + */ class WalFileIterator final : public LogIterator { public: - // The range is [startId, lastId] - // if the lastId < 0, the wal_->lastId_ will be used + /** + * @brief Construct a new wal iterator in range [start, end] + * + * @param wal Related wal file + * @param start Start log id, inclusive + * @param end End log id, inclusive + */ WalFileIterator(std::shared_ptr wal, LogID startId, LogID lastId = -1); + /** + * @brief Destroy the wal file iterator + */ virtual ~WalFileIterator(); + /** + * @brief Move forward iterator to next wal record + * + * @return LogIterator& + */ LogIterator& operator++() override; + /** + * @brief Return whether log iterator is valid + */ bool valid() const override; + /** + * @brief Return the log id pointed by current iterator + */ LogID logId() const override; + /** + * @brief Return the log term pointed by current iterator + */ TermID logTerm() const override; + /** + * @brief Return the log source pointed by current iterator + */ ClusterID logSource() const override; + /** + * @brief Return the log message pointed by current iterator + */ folly::StringPiece logMsg() const override; private: + /** + * @brief Return the first log id in next wal file + */ LogID getFirstIdInNextFile() const; private: diff --git a/src/kvstore/wal/test/CMakeLists.txt b/src/kvstore/wal/test/CMakeLists.txt index 6cfd8ee1730..0b1f9700563 100644 --- a/src/kvstore/wal/test/CMakeLists.txt +++ b/src/kvstore/wal/test/CMakeLists.txt @@ -43,8 +43,9 @@ nebula_add_test( SOURCES AtomicLogBufferTest.cpp OBJECTS - $ + ${WAL_TEST_LIBS} LIBRARIES + ${THRIFT_LIBRARIES} gtest ) @@ -55,8 +56,9 @@ nebula_add_executable( LogBufferBenchmark.cpp InMemoryLogBuffer.cpp OBJECTS - $ + ${WAL_TEST_LIBS} LIBRARIES + ${THRIFT_LIBRARIES} follybenchmark boost_regex ) diff --git a/src/kvstore/wal/test/InMemoryLogBuffer.h b/src/kvstore/wal/test/InMemoryLogBuffer.h index 1f927699b8e..8a10363d4af 100644 --- a/src/kvstore/wal/test/InMemoryLogBuffer.h +++ b/src/kvstore/wal/test/InMemoryLogBuffer.h @@ -19,12 +19,12 @@ class InMemoryLogBuffer final { public: explicit InMemoryLogBuffer(LogID firstLogId, const std::string& idStr = "") : firstLogId_(firstLogId), idStr_(idStr) { - VLOG(1) << idStr_ << "InMemoryLogBuffer ctor, firstLogId " << firstLogId_; + VLOG(2) << idStr_ << "InMemoryLogBuffer ctor, firstLogId " << firstLogId_; logs_.reserve(1024); } ~InMemoryLogBuffer() { - VLOG(1) << idStr_ << "InMemoryLogBuffer dtor, firstLogId " << firstLogId_; + VLOG(2) << idStr_ << "InMemoryLogBuffer dtor, firstLogId " << firstLogId_; } // Push a new message to the end of the buffer