Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

curvefs: add metric for mds topology to update metaserver metric #1177

Merged
merged 1 commit into from
Mar 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions curvefs/src/mds/topology/topology_item.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,8 @@ class Pool {

uint64_t GetCreateTime() const { return createTime_; }

void SetDiskThreshold(uint64_t diskCapacity) {
diskCapacity_ = diskCapacity;
void SetDiskThreshold(uint64_t diskThreshold) {
diskCapacity_ = diskThreshold;
}
uint64_t GetDiskThreshold() const { return diskCapacity_; }

Expand Down Expand Up @@ -269,8 +269,6 @@ class MetaServerSpace {
memoryThresholdByte_ = status.memorythresholdbyte();
memoryCopySetMinRequireByte_ = status.memorycopysetminrequirebyte();
memoryUsedByte_ = status.memoryusedbyte();
LOG(INFO) << "Receive space status from metaserver: "
<< status.ShortDebugString();
}

double GetResourceUseRatioPercent() {
Expand Down
51 changes: 36 additions & 15 deletions curvefs/src/mds/topology/topology_metric.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,38 +46,37 @@ void TopologyMetricService::UpdateTopologyMetrics() {
}
MetaServer ms;
if (topo_->GetMetaServer(msId, &ms)) {
it->second->diskCapacity.set_value(
it->second->diskThreshold.set_value(
ms.GetMetaServerSpace().GetDiskThreshold());
it->second->diskUsed.set_value(
ms.GetMetaServerSpace().GetDiskUsed());
it->second->diskMinRequire.set_value(
ms.GetMetaServerSpace().GetDiskMinRequire());
it->second->memoryThreshold.set_value(
ms.GetMetaServerSpace().GetMemoryThreshold());
it->second->memoryUsed.set_value(
ms.GetMetaServerSpace().GetMemoryUsed());
it->second->memoryMinRequire.set_value(
ms.GetMetaServerSpace().GetMemoryMinRequire());
}
}

// process pool
std::vector<PoolIdType> pools = topo_->GetPoolInCluster();
for (auto pid : pools) {
// prepare pool metrics
Pool pool;
if (!topo_->GetPool(pid, &pool)) {
continue;
}
std::string poolName = pool.GetName();

std::vector<CopySetInfo> copysets = topo_->GetCopySetInfosInPool(pid);

std::map<MetaServerIdType, MetaServerMetricInfo> metaServerMetricInfo;
CalcMetaServerMetrics(copysets, &metaServerMetricInfo);

auto it = gPoolMetrics.find(pid);
if (it == gPoolMetrics.end()) {
PoolMetricPtr lptr(new PoolMetric(poolName));
it = gPoolMetrics.emplace(pid, std::move(lptr)).first;
}

it->second->metaServerNum.set_value(metaServerMetricInfo.size());
it->second->copysetNum.set_value(copysets.size());

// update partitionNum, inodeNum, dentryNum
uint64_t totalInodeNum = 0;
uint64_t totalDentryNum = 0;
std::list<Partition> partitions = topo_->GetPartitionInfosInPool(pid);
Expand All @@ -89,10 +88,13 @@ void TopologyMetricService::UpdateTopologyMetrics() {
it->second->dentryNum.set_value(totalDentryNum);
it->second->partitionNum.set_value(partitions.size());

uint64_t totalDiskCapacity = 0;
uint64_t totalDiskUsed = 0;
// update copyset
std::vector<CopySetInfo> copysets = topo_->GetCopySetInfosInPool(pid);
it->second->copysetNum.set_value(copysets.size());

// process the metric of metaserver.
// process the metric of metaserver
std::map<MetaServerIdType, MetaServerMetricInfo> metaServerMetricInfo;
CalcMetaServerMetrics(copysets, &metaServerMetricInfo);
for (const auto &cm : metaServerMetricInfo) {
auto ix = gMetaServerMetrics.find(cm.first);
if (ix == gMetaServerMetrics.end()) {
Expand All @@ -104,13 +106,32 @@ void TopologyMetricService::UpdateTopologyMetrics() {
ix->second->copysetNum.set_value(cm.second.copysetNum);
ix->second->leaderNum.set_value(cm.second.leaderNum);
ix->second->partitionNum.set_value(cm.second.partitionNum);
}

// update pool resource usage
uint64_t totalDiskThreshold = 0;
uint64_t totalDiskUsed = 0;
uint64_t totalMemoryThreshold = 0;
uint64_t totalMemoryUsed = 0;
auto msIdInPool = topo_->GetMetaServerInPool(pid);
for (auto msId : msIdInPool) {
auto ix = gMetaServerMetrics.find(msId);
if (ix == gMetaServerMetrics.end()) {
MetaServerMetricPtr cptr(new MetaServerMetric(msId));
ix = gMetaServerMetrics.emplace(msId, std::move(cptr)).first;
}

totalDiskCapacity += ix->second->diskCapacity.get_value();
totalDiskThreshold += ix->second->diskThreshold.get_value();
totalDiskUsed += ix->second->diskUsed.get_value();
totalMemoryThreshold += ix->second->memoryThreshold.get_value();
totalMemoryUsed += ix->second->memoryUsed.get_value();
}

it->second->diskCapacity.set_value(totalDiskCapacity);
it->second->metaServerNum.set_value(msIdInPool.size());
it->second->diskThreshold.set_value(totalDiskThreshold);
it->second->diskUsed.set_value(totalDiskUsed);
it->second->memoryThreshold.set_value(totalMemoryThreshold);
it->second->memoryUsed.set_value(totalMemoryUsed);
}

// remove pool metrics that no longer exist
Expand Down
35 changes: 26 additions & 9 deletions curvefs/src/mds/topology/topology_metric.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,18 @@ struct MetaServerMetric {
bvar::Status<uint32_t> copysetNum;
// leader numbers
bvar::Status<uint32_t> leaderNum;
// disk capacity
bvar::Status<uint64_t> diskCapacity;
// disk utilization
// disk threshold, Byte
bvar::Status<uint64_t> diskThreshold;
// disk utilization, Byte
bvar::Status<uint64_t> diskUsed;
// memory utilization
// disk copyset min required, Byte
bvar::Status<uint64_t> diskMinRequire;
// memory threshold, Byte
bvar::Status<uint64_t> memoryThreshold;
// memory utilization, Byte
bvar::Status<uint64_t> memoryUsed;
// memory copyset min required, Byte
bvar::Status<uint64_t> memoryMinRequire;
// partition numbers
bvar::Status<uint32_t> partitionNum;

Expand All @@ -66,12 +72,18 @@ struct MetaServerMetric {
std::to_string(msId) + "_copyset_num", 0),
leaderNum(kTopologyMetaServerMetricPrefix,
std::to_string(msId) + "_leader_num", 0),
diskCapacity(kTopologyMetaServerMetricPrefix,
std::to_string(msId) + "_disk_capacity", 0),
diskThreshold(kTopologyMetaServerMetricPrefix,
std::to_string(msId) + "_disk_threshold", 0),
diskUsed(kTopologyMetaServerMetricPrefix,
std::to_string(msId) + "_disk_used", 0),
diskMinRequire(kTopologyMetaServerMetricPrefix,
std::to_string(msId) + "_disk_min_require", 0),
memoryThreshold(kTopologyMetaServerMetricPrefix,
std::to_string(msId) + "_memory_threshold", 0),
memoryUsed(kTopologyMetaServerMetricPrefix,
std::to_string(msId) + "_memory_used", 0),
memoryMinRequire(kTopologyMetaServerMetricPrefix,
std::to_string(msId) + "_memory_min_require", 0),
partitionNum(kTopologyMetaServerMetricPrefix,
std::to_string(msId) + "_partition_num", 0) {}
};
Expand All @@ -82,8 +94,10 @@ struct PoolMetric {
const std::string kTopologyPoolMetricPrefix = "topology_metric_pool_";
bvar::Status<uint32_t> metaServerNum;
bvar::Status<uint32_t> copysetNum;
bvar::Status<uint64_t> diskCapacity;
bvar::Status<uint64_t> diskThreshold;
bvar::Status<uint64_t> diskUsed;
bvar::Status<uint64_t> memoryThreshold;
bvar::Status<uint64_t> memoryUsed;
bvar::Status<uint64_t> inodeNum;
bvar::Status<uint64_t> dentryNum;
bvar::Status<uint64_t> partitionNum;
Expand All @@ -93,9 +107,12 @@ struct PoolMetric {
0),
copysetNum(kTopologyPoolMetricPrefix, poolName + "_copyset_num", 0),

diskCapacity(kTopologyPoolMetricPrefix, poolName + "_disk_capacity",
0),
diskThreshold(kTopologyPoolMetricPrefix, poolName + "_disk_threshold",
0),
diskUsed(kTopologyPoolMetricPrefix, poolName + "_disk_used", 0),
memoryThreshold(kTopologyPoolMetricPrefix,
poolName + "_memory_threshold", 0),
memoryUsed(kTopologyPoolMetricPrefix, poolName + "_memory_used", 0),
inodeNum(kTopologyPoolMetricPrefix, poolName + "_inode_num", 0),
dentryNum(kTopologyPoolMetricPrefix, poolName + "_dentry_num", 0),
partitionNum(kTopologyPoolMetricPrefix, poolName + "_partition_num",
Expand Down
17 changes: 13 additions & 4 deletions curvefs/src/metaserver/heartbeat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -266,10 +266,19 @@ void Heartbeat::DumpHeartbeatRequest(const HeartbeatRequest& request) {
VLOG(6) << "Heartbeat request: Metaserver ID: " << request.metaserverid()
<< ", IP = " << request.ip() << ", port = " << request.port()
<< ", copyset count = " << request.copysetcount()
<< ", leader count = " << request.leadercount();
// << ", metadataSpaceTotal = " << request.metadataspacetotal()
// << " KB, metadataSpaceUsed = " << request.metadataspaceused()
// << " KB, memoryUsed = " << request.memoryused() << " KB";
<< ", leader count = " << request.leadercount()
<< ", diskThresholdByte = "
<< request.spacestatus().diskthresholdbyte()
<< ", diskCopysetMinRequireByte = "
<< request.spacestatus().diskcopysetminrequirebyte()
<< ", diskUsedByte = "
<< request.spacestatus().diskusedbyte()
<< ", memoryThresholdByte = "
<< request.spacestatus().memorythresholdbyte()
<< ", memoryCopySetMinRequireByte = "
<< request.spacestatus().memorycopysetminrequirebyte()
<< ", memoryUsedByte = " << request.spacestatus().memoryusedbyte();

for (int i = 0; i < request.copysetinfos_size(); i++) {
const curvefs::mds::heartbeat::CopySetInfo &info =
request.copysetinfos(i);
Expand Down
4 changes: 3 additions & 1 deletion curvefs/src/metaserver/storage/memory_storage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,9 @@ bool MemoryStorage::GetStatistics(StorageStatistics* statistics) {
if (!GetProcMemory(&vmRSS)) {
return false;
}
statistics->memoryUsageBytes = vmRSS;

// vmRSS is KB, change it to Byte
statistics->memoryUsageBytes = vmRSS * 1024;

// disk usage bytes
uint64_t total, available;
Expand Down
8 changes: 4 additions & 4 deletions curvefs/test/mds/topology/test_topology_metric.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,31 +178,31 @@ TEST_F(TestTopologyMetric, TestUpdateTopologyMetricsOnePool) {
ASSERT_EQ(2, gMetaServerMetrics[0x41]->scatterWidth.get_value());
ASSERT_EQ(1, gMetaServerMetrics[0x41]->copysetNum.get_value());
ASSERT_EQ(0, gMetaServerMetrics[0x41]->leaderNum.get_value());
ASSERT_EQ(100 * 1024, gMetaServerMetrics[0x41]->diskCapacity.get_value());
ASSERT_EQ(100 * 1024, gMetaServerMetrics[0x41]->diskThreshold.get_value());
ASSERT_EQ(10 * 1024, gMetaServerMetrics[0x41]->diskUsed.get_value());
ASSERT_EQ(20 * 1024, gMetaServerMetrics[0x41]->memoryUsed.get_value());
ASSERT_EQ(3, gMetaServerMetrics[0x41]->partitionNum.get_value());

ASSERT_EQ(2, gMetaServerMetrics[0x42]->scatterWidth.get_value());
ASSERT_EQ(1, gMetaServerMetrics[0x42]->copysetNum.get_value());
ASSERT_EQ(0, gMetaServerMetrics[0x42]->leaderNum.get_value());
ASSERT_EQ(100 * 1024, gMetaServerMetrics[0x42]->diskCapacity.get_value());
ASSERT_EQ(100 * 1024, gMetaServerMetrics[0x42]->diskThreshold.get_value());
ASSERT_EQ(10 * 1024, gMetaServerMetrics[0x42]->diskUsed.get_value());
ASSERT_EQ(20 * 1024, gMetaServerMetrics[0x42]->memoryUsed.get_value());
ASSERT_EQ(3, gMetaServerMetrics[0x42]->partitionNum.get_value());

ASSERT_EQ(2, gMetaServerMetrics[0x43]->scatterWidth.get_value());
ASSERT_EQ(1, gMetaServerMetrics[0x43]->copysetNum.get_value());
ASSERT_EQ(0, gMetaServerMetrics[0x43]->leaderNum.get_value());
ASSERT_EQ(100 * 1024, gMetaServerMetrics[0x43]->diskCapacity.get_value());
ASSERT_EQ(100 * 1024, gMetaServerMetrics[0x43]->diskThreshold.get_value());
ASSERT_EQ(10 * 1024, gMetaServerMetrics[0x43]->diskUsed.get_value());
ASSERT_EQ(20 * 1024, gMetaServerMetrics[0x43]->memoryUsed.get_value());
ASSERT_EQ(3, gMetaServerMetrics[0x43]->partitionNum.get_value());

ASSERT_EQ(1, gPoolMetrics.size());
ASSERT_EQ(3, gPoolMetrics[poolId]->metaServerNum.get_value());
ASSERT_EQ(1, gPoolMetrics[poolId]->copysetNum.get_value());
ASSERT_EQ(100 * 1024 * 3, gPoolMetrics[poolId]->diskCapacity.get_value());
ASSERT_EQ(100 * 1024 * 3, gPoolMetrics[poolId]->diskThreshold.get_value());
ASSERT_EQ(10 * 1024 * 3, gPoolMetrics[poolId]->diskUsed.get_value());
ASSERT_EQ(30, gPoolMetrics[poolId]->inodeNum.get_value());
ASSERT_EQ(300, gPoolMetrics[poolId]->dentryNum.get_value());
Expand Down