Skip to content

Commit

Permalink
Add status metrics, grafana panels about S3 GC (#7174)
Browse files Browse the repository at this point in the history
ref #6827, close #7258
  • Loading branch information
JaySon-Huang authored Apr 10, 2023
1 parent 6549777 commit 1237e95
Show file tree
Hide file tree
Showing 21 changed files with 1,233 additions and 521 deletions.
33 changes: 26 additions & 7 deletions dbms/src/Common/TiFlashMetrics.h
Original file line number Diff line number Diff line change
Expand Up @@ -218,8 +218,8 @@ namespace DB
F(type_lock_conflict, {"type", "lock_conflict"}), F(type_delete_conflict, {"type", "delete_conflict"}), \
F(type_delete_risk, {"type", "delete_risk"})) \
M(tiflash_disaggregated_object_lock_request_duration_seconds, "Bucketed histogram of S3 object lock/delete request duration", Histogram, \
F(type_lock, {{"type", "cop"}}, ExpBuckets{0.001, 2, 20}), \
F(type_delete, {{"type", "batch"}}, ExpBuckets{0.001, 2, 20})) \
F(type_lock, {{"type", "lock"}}, ExpBuckets{0.001, 2, 20}), \
F(type_delete, {{"type", "delete"}}, ExpBuckets{0.001, 2, 20})) \
M(tiflash_disaggregated_read_tasks_count, "Total number of storage engine disaggregated read tasks", Counter) \
M(tiflash_disaggregated_breakdown_duration_seconds, "", Histogram, \
F(type_rpc_establish, {{"type", "rpc_establish"}}, ExpBuckets{0.01, 2, 20}), \
Expand Down Expand Up @@ -319,8 +319,26 @@ namespace DB
M(tiflash_storage_remote_stats, "The file stats on remote store", Gauge, \
F(type_total_size, {"type", "total_size"}), F(type_valid_size, {"type", "valid_size"}), \
F(type_num_files, {"type", "num_files"})) \
M(tiflash_storage_checkpoint_seconds, "PageStorage checkpoint elapsed time", \
Histogram, /* these command usually cost several seconds, increase the start bucket to 50ms */ \
F(type_dump_checkpoint_snapshot, {{"type", "dump_checkpoint_snapshot"}}, ExpBuckets{0.05, 2, 20}), \
F(type_dump_checkpoint_data, {{"type", "dump_checkpoint_data"}}, ExpBuckets{0.05, 2, 20}), \
F(type_upload_checkpoint, {{"type", "upload_checkpoint"}}, ExpBuckets{0.05, 2, 20}), \
F(type_copy_checkpoint_info, {{"type", "copy_checkpoint_info"}}, ExpBuckets{0.05, 2, 20})) \
M(tiflash_storage_checkpoint_flow, "The bytes flow cause by remote checkpoint", Counter, \
F(type_incremental, {"type", "incremental"}), F(type_compaction, {"type", "compaction"})) \
M(tiflash_storage_checkpoint_keys_by_types, "The keys flow cause by remote checkpoint", Counter, \
F(type_raftengine, {"type", "raftengine"}), F(type_kvengine, {"type", "kvengine"}), F(type_kvstore, {"type", "kvstore"}), \
F(type_data, {"type", "data"}), F(type_log, {"type", "log"}), F(type_meta, {"type", "kvstore"}), \
F(type_unknown, {"type", "unknown"})) \
M(tiflash_storage_checkpoint_flow_by_types, "The bytes flow cause by remote checkpoint", Counter, \
F(type_raftengine, {"type", "raftengine"}), F(type_kvengine, {"type", "kvengine"}), F(type_kvstore, {"type", "kvstore"}), \
F(type_data, {"type", "data"}), F(type_log, {"type", "log"}), F(type_meta, {"type", "kvstore"}), \
F(type_unknown, {"type", "unknown"})) \
M(tiflash_storage_page_data_by_types, "The existing bytes stored in UniPageStorage", Gauge, \
F(type_raftengine, {"type", "raftengine"}), F(type_kvengine, {"type", "kvengine"}), F(type_kvstore, {"type", "kvstore"}), \
F(type_data, {"type", "data"}), F(type_log, {"type", "log"}), F(type_meta, {"type", "kvstore"}), \
F(type_unknown, {"type", "unknown"})) \
M(tiflash_storage_s3_request_seconds, "S3 request duration in seconds", Histogram, \
F(type_put_object, {{"type", "put_object"}}, ExpBuckets{0.001, 2, 20}), \
F(type_copy_object, {{"type", "copy_object"}}, ExpBuckets{0.001, 2, 20}), \
Expand All @@ -331,6 +349,11 @@ namespace DB
F(type_list_objects, {{"type", "list_objects"}}, ExpBuckets{0.001, 2, 20}), \
F(type_delete_object, {{"type", "delete_object"}}, ExpBuckets{0.001, 2, 20}), \
F(type_head_object, {{"type", "head_object"}}, ExpBuckets{0.001, 2, 20})) \
M(tiflash_storage_s3_gc_status, "S3 GC status", Gauge, \
F(type_lifecycle_added, {{"type", "lifecycle_added"}}), \
F(type_lifecycle_failed, {{"type", "lifecycle_failed"}}), \
F(type_owner, {{"type", "owner"}}), \
F(type_running, {{"type", "running"}})) \
M(tiflash_storage_s3_gc_seconds, "S3 GC subprocess duration in seconds", \
Histogram, /* these command usually cost several seconds, increase the start bucket to 500ms */ \
F(type_total, {{"type", "total"}}, ExpBuckets{0.5, 2, 20}), \
Expand All @@ -339,11 +362,7 @@ namespace DB
F(type_clean_locks, {{"type", "clean_locks"}}, ExpBuckets{0.5, 2, 20}), \
F(type_clean_manifests, {{"type", "clean_manifests"}}, ExpBuckets{0.5, 2, 20}), \
F(type_scan_then_clean_data_files, {{"type", "scan_then_clean_data_files"}}, ExpBuckets{0.5, 2, 20}), \
F(type_clean_one_lock, {{"type", "clean_one_lock"}}, ExpBuckets{0.5, 2, 20})) \
M(tiflash_storage_checkpoint_seconds, "PageStorage checkpoint elapsed time", Histogram, \
F(type_dump_checkpoint_snapshot, {{"type", "dump_checkpoint_snapshot"}}, ExpBuckets{0.001, 2, 20}), \
F(type_dump_checkpoint_data, {{"type", "dump_checkpoint_data"}}, ExpBuckets{0.001, 2, 20}), \
F(type_upload_checkpoint, {{"type", "upload_checkpoint"}}, ExpBuckets{0.001, 2, 20}))
F(type_clean_one_lock, {{"type", "clean_one_lock"}}, ExpBuckets{0.5, 2, 20}))

// clang-format on

Expand Down
1 change: 1 addition & 0 deletions dbms/src/Flash/Disaggregated/S3LockService.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,7 @@ bool S3LockService::tryMarkDeleteImpl(const String & data_file_key, disaggregate
{
const S3FilenameView key_view = S3FilenameView::fromKey(data_file_key);
RUNTIME_CHECK(key_view.isDataFile(), data_file_key);
GET_METRIC(tiflash_disaggregated_object_lock_request_count, type_delete).Increment();

if (!gc_owner->isOwner())
{
Expand Down
87 changes: 66 additions & 21 deletions dbms/src/Interpreters/AsynchronousMetrics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@
#include <Common/Allocator.h>
#include <Common/CurrentMetrics.h>
#include <Common/Exception.h>
#include <Common/TiFlashMetrics.h>
#include <Common/setThreadName.h>
#include <Common/typeid_cast.h>
#include <Core/TiFlashDisaggregatedMode.h>
#include <Databases/IDatabase.h>
#include <IO/UncompressedCache.h>
#include <Interpreters/AsynchronousMetrics.h>
Expand Down Expand Up @@ -125,27 +127,62 @@ static void calculateMaxAndSum(Max & max, Sum & sum, T x)

FileUsageStatistics AsynchronousMetrics::getPageStorageFileUsage()
{
RUNTIME_ASSERT(!(context.getSharedContextDisagg()->isDisaggregatedComputeMode() && context.getSharedContextDisagg()->use_autoscaler));
// Get from RegionPersister
auto & tmt = context.getTMTContext();
auto & kvstore = tmt.getKVStore();
FileUsageStatistics usage = kvstore->getFileUsageStatistics();

// Get the blob file status from all PS V3 instances
if (auto global_storage_pool = context.getGlobalStoragePool(); global_storage_pool != nullptr)
FileUsageStatistics usage;
switch (context.getSharedContextDisagg()->disaggregated_mode)
{
const auto log_usage = global_storage_pool->log_storage->getFileUsageStatistics();
const auto meta_usage = global_storage_pool->meta_storage->getFileUsageStatistics();
const auto data_usage = global_storage_pool->data_storage->getFileUsageStatistics();
case DisaggregatedMode::None:
{
if (auto uni_ps = context.tryGetWriteNodePageStorage(); uni_ps != nullptr)
{
/// When format_version=5 is enabled, then all data are stored in the `uni_ps`
usage.merge(uni_ps->getFileUsageStatistics());
}
else
{
/// When format_version < 5, then there are multiple PageStorage instances

usage.merge(log_usage)
.merge(meta_usage)
.merge(data_usage);
}
// Get from RegionPersister
auto & tmt = context.getTMTContext();
auto & kvstore = tmt.getKVStore();
usage = kvstore->getFileUsageStatistics();

// Get the blob file status from all PS V3 instances
if (auto global_storage_pool = context.getGlobalStoragePool(); global_storage_pool != nullptr)
{
const auto log_usage = global_storage_pool->log_storage->getFileUsageStatistics();
const auto meta_usage = global_storage_pool->meta_storage->getFileUsageStatistics();
const auto data_usage = global_storage_pool->data_storage->getFileUsageStatistics();

if (auto ps_cache = context.getSharedContextDisagg()->rn_page_cache_storage; ps_cache != nullptr)
usage.merge(log_usage)
.merge(meta_usage)
.merge(data_usage);
}
}
break;
}
case DisaggregatedMode::Storage:
{
// disagg write node, all data are stored in the `uni_ps`
if (auto uni_ps = context.getWriteNodePageStorage(); uni_ps != nullptr)
{
usage.merge(uni_ps->getFileUsageStatistics());
}
break;
}
case DisaggregatedMode::Compute:
{
usage.merge(ps_cache->getUniversalPageStorage()->getFileUsageStatistics());
// disagg compute node without auto-scaler, the proxy data are stored in the `uni_ps`
if (auto uni_ps = context.getWriteNodePageStorage(); uni_ps != nullptr)
{
usage.merge(uni_ps->getFileUsageStatistics());
}
// disagg compute node, all cache page data are stored in the `ps_cache`
if (auto ps_cache = context.getSharedContextDisagg()->rn_page_cache_storage; ps_cache != nullptr)
{
usage.merge(ps_cache->getUniversalPageStorage()->getFileUsageStatistics());
}
break;
}
}

return usage;
Expand Down Expand Up @@ -206,7 +243,6 @@ void AsynchronousMetrics::update()
set("MaxDTBackgroundTasksLength", max_dt_background_tasks_length);
}

if (!(context.getSharedContextDisagg()->isDisaggregatedComputeMode() && context.getSharedContextDisagg()->use_autoscaler))
{
const FileUsageStatistics usage = getPageStorageFileUsage();
set("BlobFileNums", usage.total_file_num);
Expand All @@ -217,6 +253,15 @@ void AsynchronousMetrics::update()
set("PagesInMem", usage.num_pages);
}

if (context.getSharedContextDisagg()->isDisaggregatedStorageMode())
{
auto & tmt = context.getTMTContext();
if (auto s3_gc_owner = tmt.getS3GCOwnerManager(); s3_gc_owner->isOwner())
{
GET_METRIC(tiflash_storage_s3_gc_status, type_owner).Set(1.0);
}
}

#if USE_MIMALLOC
#define MI_STATS_SET(X) set("mimalloc." #X, X)

Expand Down Expand Up @@ -256,7 +301,7 @@ void AsynchronousMetrics::update()
M("background_thread.num_runs", uint64_t) \
M("background_thread.run_interval", uint64_t)

#define GET_METRIC(NAME, TYPE) \
#define GET_JEMALLOC_METRIC(NAME, TYPE) \
do \
{ \
TYPE value{}; \
Expand All @@ -265,9 +310,9 @@ void AsynchronousMetrics::update()
set("jemalloc." NAME, value); \
} while (0);

FOR_EACH_METRIC(GET_METRIC);
FOR_EACH_METRIC(GET_JEMALLOC_METRIC);

#undef GET_METRIC
#undef GET_JEMALLOC_METRIC
#undef FOR_EACH_METRIC
}
#endif
Expand Down
2 changes: 1 addition & 1 deletion dbms/src/Server/StorageConfigParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ struct StorageRemoteCacheConfig
String dir;
UInt64 capacity = 0;
UInt64 dtfile_level = 100;
double delta_rate = 0.3;
double delta_rate = 0.1;

bool isCacheEnabled() const;
void initCacheDir() const;
Expand Down
5 changes: 5 additions & 0 deletions dbms/src/Storages/Page/PageConstants.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,15 @@ static constexpr UInt64 GB = MB * 1024;

enum class StorageType
{
Unknown = 0,
Log = 1,
Data = 2,
Meta = 3,
KVStore = 4,
RaftEngine = 5,
KVEngine = 6,

_MAX_STORAGE_TYPE_, // NOLINT(bugprone-reserved-identifier)
};

enum class PageStorageRunMode : UInt8
Expand Down
6 changes: 0 additions & 6 deletions dbms/src/Storages/Page/V3/CheckpointFile/CPDataFileStat.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,6 @@
namespace DB::PS::V3
{

struct CPDataWriteStats
{
bool has_new_data = false;
size_t incremental_data_bytes = 0;
size_t compact_data_bytes = 0;
};

using RemoteFileValidSizes = std::unordered_map<String, size_t>;

Expand Down
85 changes: 85 additions & 0 deletions dbms/src/Storages/Page/V3/CheckpointFile/CPDumpStat.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
// Copyright 2023 PingCAP, Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <Common/TiFlashMetrics.h>
#include <Storages/Page/PageConstants.h>
#include <Storages/Page/V3/CheckpointFile/CPDumpStat.h>
#include <fmt/core.h>

namespace DB::PS::V3
{

void SetMetrics(const CPDataDumpStats & stats)
{
for (size_t i = 0; i < static_cast<size_t>(DB::StorageType::_MAX_STORAGE_TYPE_); ++i)
{
auto type = static_cast<DB::StorageType>(i);
switch (type)
{
case DB::StorageType::Unknown:
{
GET_METRIC(tiflash_storage_checkpoint_keys_by_types, type_unknown).Increment(stats.num_keys[i]);
GET_METRIC(tiflash_storage_checkpoint_flow_by_types, type_unknown).Increment(stats.num_bytes[i]);
GET_METRIC(tiflash_storage_page_data_by_types, type_unknown).Set(stats.num_existing_bytes[i]);
break;
}
case DB::StorageType::RaftEngine:
{
GET_METRIC(tiflash_storage_checkpoint_keys_by_types, type_raftengine).Increment(stats.num_keys[i]);
GET_METRIC(tiflash_storage_checkpoint_flow_by_types, type_raftengine).Increment(stats.num_bytes[i]);
GET_METRIC(tiflash_storage_page_data_by_types, type_raftengine).Set(stats.num_existing_bytes[i]);
break;
}
case DB::StorageType::KVEngine:
{
GET_METRIC(tiflash_storage_checkpoint_keys_by_types, type_kvengine).Increment(stats.num_keys[i]);
GET_METRIC(tiflash_storage_checkpoint_flow_by_types, type_kvengine).Increment(stats.num_bytes[i]);
GET_METRIC(tiflash_storage_page_data_by_types, type_kvengine).Set(stats.num_existing_bytes[i]);
break;
}
case DB::StorageType::KVStore:
{
GET_METRIC(tiflash_storage_checkpoint_keys_by_types, type_kvstore).Increment(stats.num_keys[i]);
GET_METRIC(tiflash_storage_checkpoint_flow_by_types, type_kvstore).Increment(stats.num_bytes[i]);
GET_METRIC(tiflash_storage_page_data_by_types, type_kvstore).Set(stats.num_existing_bytes[i]);
break;
}
case DB::StorageType::Data:
{
GET_METRIC(tiflash_storage_checkpoint_keys_by_types, type_data).Increment(stats.num_keys[i]);
GET_METRIC(tiflash_storage_checkpoint_flow_by_types, type_data).Increment(stats.num_bytes[i]);
GET_METRIC(tiflash_storage_page_data_by_types, type_data).Set(stats.num_existing_bytes[i]);
break;
}
case DB::StorageType::Log:
{
GET_METRIC(tiflash_storage_checkpoint_keys_by_types, type_log).Increment(stats.num_keys[i]);
GET_METRIC(tiflash_storage_checkpoint_flow_by_types, type_log).Increment(stats.num_bytes[i]);
GET_METRIC(tiflash_storage_page_data_by_types, type_log).Set(stats.num_existing_bytes[i]);
break;
}
case DB::StorageType::Meta:
{
GET_METRIC(tiflash_storage_checkpoint_keys_by_types, type_meta).Increment(stats.num_keys[i]);
GET_METRIC(tiflash_storage_checkpoint_flow_by_types, type_meta).Increment(stats.num_bytes[i]);
GET_METRIC(tiflash_storage_page_data_by_types, type_meta).Set(stats.num_existing_bytes[i]);
break;
}
default:
__builtin_unreachable();
}
}
}

} // namespace DB::PS::V3
Loading

0 comments on commit 1237e95

Please sign in to comment.