Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add status metrics, grafana panels about S3 GC #7174

Merged
merged 12 commits into from
Apr 10, 2023
33 changes: 26 additions & 7 deletions dbms/src/Common/TiFlashMetrics.h
Original file line number Diff line number Diff line change
Expand Up @@ -218,8 +218,8 @@ namespace DB
F(type_lock_conflict, {"type", "lock_conflict"}), F(type_delete_conflict, {"type", "delete_conflict"}), \
F(type_delete_risk, {"type", "delete_risk"})) \
M(tiflash_disaggregated_object_lock_request_duration_seconds, "Bucketed histogram of S3 object lock/delete request duration", Histogram, \
F(type_lock, {{"type", "cop"}}, ExpBuckets{0.001, 2, 20}), \
F(type_delete, {{"type", "batch"}}, ExpBuckets{0.001, 2, 20})) \
F(type_lock, {{"type", "lock"}}, ExpBuckets{0.001, 2, 20}), \
F(type_delete, {{"type", "delete"}}, ExpBuckets{0.001, 2, 20})) \
M(tiflash_disaggregated_read_tasks_count, "Total number of storage engine disaggregated read tasks", Counter) \
M(tiflash_disaggregated_breakdown_duration_seconds, "", Histogram, \
F(type_rpc_establish, {{"type", "rpc_establish"}}, ExpBuckets{0.01, 2, 20}), \
Expand Down Expand Up @@ -319,8 +319,26 @@ namespace DB
M(tiflash_storage_remote_stats, "The file stats on remote store", Gauge, \
F(type_total_size, {"type", "total_size"}), F(type_valid_size, {"type", "valid_size"}), \
F(type_num_files, {"type", "num_files"})) \
M(tiflash_storage_checkpoint_seconds, "PageStorage checkpoint elapsed time", \
Histogram, /* these command usually cost several seconds, increase the start bucket to 50ms */ \
F(type_dump_checkpoint_snapshot, {{"type", "dump_checkpoint_snapshot"}}, ExpBuckets{0.05, 2, 20}), \
F(type_dump_checkpoint_data, {{"type", "dump_checkpoint_data"}}, ExpBuckets{0.05, 2, 20}), \
F(type_upload_checkpoint, {{"type", "upload_checkpoint"}}, ExpBuckets{0.05, 2, 20}), \
F(type_copy_checkpoint_info, {{"type", "copy_checkpoint_info"}}, ExpBuckets{0.05, 2, 20})) \
M(tiflash_storage_checkpoint_flow, "The bytes flow cause by remote checkpoint", Counter, \
F(type_incremental, {"type", "incremental"}), F(type_compaction, {"type", "compaction"})) \
M(tiflash_storage_checkpoint_keys_by_types, "The keys flow cause by remote checkpoint", Counter, \
F(type_raftengine, {"type", "raftengine"}), F(type_kvengine, {"type", "kvengine"}), F(type_kvstore, {"type", "kvstore"}), \
F(type_data, {"type", "data"}), F(type_log, {"type", "log"}), F(type_meta, {"type", "kvstore"}), \
F(type_unknown, {"type", "unknown"})) \
M(tiflash_storage_checkpoint_flow_by_types, "The bytes flow cause by remote checkpoint", Counter, \
F(type_raftengine, {"type", "raftengine"}), F(type_kvengine, {"type", "kvengine"}), F(type_kvstore, {"type", "kvstore"}), \
F(type_data, {"type", "data"}), F(type_log, {"type", "log"}), F(type_meta, {"type", "kvstore"}), \
F(type_unknown, {"type", "unknown"})) \
M(tiflash_storage_page_data_by_types, "The existing bytes stored in UniPageStorage", Gauge, \
F(type_raftengine, {"type", "raftengine"}), F(type_kvengine, {"type", "kvengine"}), F(type_kvstore, {"type", "kvstore"}), \
F(type_data, {"type", "data"}), F(type_log, {"type", "log"}), F(type_meta, {"type", "kvstore"}), \
F(type_unknown, {"type", "unknown"})) \
M(tiflash_storage_s3_request_seconds, "S3 request duration in seconds", Histogram, \
F(type_put_object, {{"type", "put_object"}}, ExpBuckets{0.001, 2, 20}), \
F(type_copy_object, {{"type", "copy_object"}}, ExpBuckets{0.001, 2, 20}), \
Expand All @@ -331,6 +349,11 @@ namespace DB
F(type_list_objects, {{"type", "list_objects"}}, ExpBuckets{0.001, 2, 20}), \
F(type_delete_object, {{"type", "delete_object"}}, ExpBuckets{0.001, 2, 20}), \
F(type_head_object, {{"type", "head_object"}}, ExpBuckets{0.001, 2, 20})) \
M(tiflash_storage_s3_gc_status, "S3 GC status", Gauge, \
F(type_lifecycle_added, {{"type", "lifecycle_added"}}), \
F(type_lifecycle_failed, {{"type", "lifecycle_failed"}}), \
F(type_owner, {{"type", "owner"}}), \
F(type_running, {{"type", "running"}})) \
M(tiflash_storage_s3_gc_seconds, "S3 GC subprocess duration in seconds", \
Histogram, /* these command usually cost several seconds, increase the start bucket to 500ms */ \
F(type_total, {{"type", "total"}}, ExpBuckets{0.5, 2, 20}), \
Expand All @@ -339,11 +362,7 @@ namespace DB
F(type_clean_locks, {{"type", "clean_locks"}}, ExpBuckets{0.5, 2, 20}), \
F(type_clean_manifests, {{"type", "clean_manifests"}}, ExpBuckets{0.5, 2, 20}), \
F(type_scan_then_clean_data_files, {{"type", "scan_then_clean_data_files"}}, ExpBuckets{0.5, 2, 20}), \
F(type_clean_one_lock, {{"type", "clean_one_lock"}}, ExpBuckets{0.5, 2, 20})) \
M(tiflash_storage_checkpoint_seconds, "PageStorage checkpoint elapsed time", Histogram, \
F(type_dump_checkpoint_snapshot, {{"type", "dump_checkpoint_snapshot"}}, ExpBuckets{0.001, 2, 20}), \
F(type_dump_checkpoint_data, {{"type", "dump_checkpoint_data"}}, ExpBuckets{0.001, 2, 20}), \
F(type_upload_checkpoint, {{"type", "upload_checkpoint"}}, ExpBuckets{0.001, 2, 20}))
F(type_clean_one_lock, {{"type", "clean_one_lock"}}, ExpBuckets{0.5, 2, 20}))

// clang-format on

Expand Down
1 change: 1 addition & 0 deletions dbms/src/Flash/Disaggregated/S3LockService.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,7 @@ bool S3LockService::tryMarkDeleteImpl(const String & data_file_key, disaggregate
{
const S3FilenameView key_view = S3FilenameView::fromKey(data_file_key);
RUNTIME_CHECK(key_view.isDataFile(), data_file_key);
GET_METRIC(tiflash_disaggregated_object_lock_request_count, type_delete).Increment();

if (!gc_owner->isOwner())
{
Expand Down
87 changes: 66 additions & 21 deletions dbms/src/Interpreters/AsynchronousMetrics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@
#include <Common/Allocator.h>
#include <Common/CurrentMetrics.h>
#include <Common/Exception.h>
#include <Common/TiFlashMetrics.h>
#include <Common/setThreadName.h>
#include <Common/typeid_cast.h>
#include <Core/TiFlashDisaggregatedMode.h>
#include <Databases/IDatabase.h>
#include <IO/UncompressedCache.h>
#include <Interpreters/AsynchronousMetrics.h>
Expand Down Expand Up @@ -125,27 +127,62 @@ static void calculateMaxAndSum(Max & max, Sum & sum, T x)

FileUsageStatistics AsynchronousMetrics::getPageStorageFileUsage()
{
RUNTIME_ASSERT(!(context.getSharedContextDisagg()->isDisaggregatedComputeMode() && context.getSharedContextDisagg()->use_autoscaler));
// Get from RegionPersister
auto & tmt = context.getTMTContext();
auto & kvstore = tmt.getKVStore();
FileUsageStatistics usage = kvstore->getFileUsageStatistics();

// Get the blob file status from all PS V3 instances
if (auto global_storage_pool = context.getGlobalStoragePool(); global_storage_pool != nullptr)
FileUsageStatistics usage;
switch (context.getSharedContextDisagg()->disaggregated_mode)
{
const auto log_usage = global_storage_pool->log_storage->getFileUsageStatistics();
const auto meta_usage = global_storage_pool->meta_storage->getFileUsageStatistics();
const auto data_usage = global_storage_pool->data_storage->getFileUsageStatistics();
case DisaggregatedMode::None:
{
if (auto uni_ps = context.tryGetWriteNodePageStorage(); uni_ps != nullptr)
{
/// When format_version=5 is enabled, then all data are stored in the `uni_ps`
usage.merge(uni_ps->getFileUsageStatistics());
}
else
{
/// When format_version < 5, then there are multiple PageStorage instances

usage.merge(log_usage)
.merge(meta_usage)
.merge(data_usage);
}
// Get from RegionPersister
auto & tmt = context.getTMTContext();
auto & kvstore = tmt.getKVStore();
usage = kvstore->getFileUsageStatistics();

// Get the blob file status from all PS V3 instances
if (auto global_storage_pool = context.getGlobalStoragePool(); global_storage_pool != nullptr)
{
const auto log_usage = global_storage_pool->log_storage->getFileUsageStatistics();
const auto meta_usage = global_storage_pool->meta_storage->getFileUsageStatistics();
const auto data_usage = global_storage_pool->data_storage->getFileUsageStatistics();

if (auto ps_cache = context.getSharedContextDisagg()->rn_page_cache_storage; ps_cache != nullptr)
usage.merge(log_usage)
.merge(meta_usage)
.merge(data_usage);
}
}
break;
}
case DisaggregatedMode::Storage:
{
// disagg write node, all data are stored in the `uni_ps`
if (auto uni_ps = context.getWriteNodePageStorage(); uni_ps != nullptr)
{
usage.merge(uni_ps->getFileUsageStatistics());
}
break;
}
case DisaggregatedMode::Compute:
{
usage.merge(ps_cache->getUniversalPageStorage()->getFileUsageStatistics());
// disagg compute node without auto-scaler, the proxy data are stored in the `uni_ps`
if (auto uni_ps = context.getWriteNodePageStorage(); uni_ps != nullptr)
{
usage.merge(uni_ps->getFileUsageStatistics());
}
// disagg compute node, all cache page data are stored in the `ps_cache`
if (auto ps_cache = context.getSharedContextDisagg()->rn_page_cache_storage; ps_cache != nullptr)
{
usage.merge(ps_cache->getUniversalPageStorage()->getFileUsageStatistics());
}
break;
}
}

return usage;
Expand Down Expand Up @@ -206,7 +243,6 @@ void AsynchronousMetrics::update()
set("MaxDTBackgroundTasksLength", max_dt_background_tasks_length);
}

if (!(context.getSharedContextDisagg()->isDisaggregatedComputeMode() && context.getSharedContextDisagg()->use_autoscaler))
{
const FileUsageStatistics usage = getPageStorageFileUsage();
set("BlobFileNums", usage.total_file_num);
Expand All @@ -217,6 +253,15 @@ void AsynchronousMetrics::update()
set("PagesInMem", usage.num_pages);
}

if (context.getSharedContextDisagg()->isDisaggregatedStorageMode())
{
auto & tmt = context.getTMTContext();
if (auto s3_gc_owner = tmt.getS3GCOwnerManager(); s3_gc_owner->isOwner())
{
GET_METRIC(tiflash_storage_s3_gc_status, type_owner).Set(1.0);
}
}

#if USE_MIMALLOC
#define MI_STATS_SET(X) set("mimalloc." #X, X)

Expand Down Expand Up @@ -256,7 +301,7 @@ void AsynchronousMetrics::update()
M("background_thread.num_runs", uint64_t) \
M("background_thread.run_interval", uint64_t)

#define GET_METRIC(NAME, TYPE) \
#define GET_JEMALLOC_METRIC(NAME, TYPE) \
do \
{ \
TYPE value{}; \
Expand All @@ -265,9 +310,9 @@ void AsynchronousMetrics::update()
set("jemalloc." NAME, value); \
} while (0);

FOR_EACH_METRIC(GET_METRIC);
FOR_EACH_METRIC(GET_JEMALLOC_METRIC);

#undef GET_METRIC
#undef GET_JEMALLOC_METRIC
#undef FOR_EACH_METRIC
}
#endif
Expand Down
2 changes: 1 addition & 1 deletion dbms/src/Server/StorageConfigParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ struct StorageRemoteCacheConfig
String dir;
UInt64 capacity = 0;
UInt64 dtfile_level = 100;
double delta_rate = 0.3;
double delta_rate = 0.1;

bool isCacheEnabled() const;
void initCacheDir() const;
Expand Down
5 changes: 5 additions & 0 deletions dbms/src/Storages/Page/PageConstants.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,15 @@ static constexpr UInt64 GB = MB * 1024;

enum class StorageType
{
Unknown = 0,
Log = 1,
Data = 2,
Meta = 3,
KVStore = 4,
RaftEngine = 5,
KVEngine = 6,

_MAX_STORAGE_TYPE_, // NOLINT(bugprone-reserved-identifier)
};

enum class PageStorageRunMode : UInt8
Expand Down
6 changes: 0 additions & 6 deletions dbms/src/Storages/Page/V3/CheckpointFile/CPDataFileStat.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,6 @@
namespace DB::PS::V3
{

struct CPDataWriteStats
{
bool has_new_data = false;
size_t incremental_data_bytes = 0;
size_t compact_data_bytes = 0;
};

using RemoteFileValidSizes = std::unordered_map<String, size_t>;

Expand Down
85 changes: 85 additions & 0 deletions dbms/src/Storages/Page/V3/CheckpointFile/CPDumpStat.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
// Copyright 2023 PingCAP, Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <Common/TiFlashMetrics.h>
#include <Storages/Page/PageConstants.h>
#include <Storages/Page/V3/CheckpointFile/CPDumpStat.h>
#include <fmt/core.h>

namespace DB::PS::V3
{

void SetMetrics(const CPDataDumpStats & stats)
{
for (size_t i = 0; i < static_cast<size_t>(DB::StorageType::_MAX_STORAGE_TYPE_); ++i)
{
auto type = static_cast<DB::StorageType>(i);
switch (type)
{
case DB::StorageType::Unknown:
{
GET_METRIC(tiflash_storage_checkpoint_keys_by_types, type_unknown).Increment(stats.num_keys[i]);
GET_METRIC(tiflash_storage_checkpoint_flow_by_types, type_unknown).Increment(stats.num_bytes[i]);
GET_METRIC(tiflash_storage_page_data_by_types, type_unknown).Set(stats.num_existing_bytes[i]);
break;
}
case DB::StorageType::RaftEngine:
{
GET_METRIC(tiflash_storage_checkpoint_keys_by_types, type_raftengine).Increment(stats.num_keys[i]);
GET_METRIC(tiflash_storage_checkpoint_flow_by_types, type_raftengine).Increment(stats.num_bytes[i]);
GET_METRIC(tiflash_storage_page_data_by_types, type_raftengine).Set(stats.num_existing_bytes[i]);
break;
}
case DB::StorageType::KVEngine:
{
GET_METRIC(tiflash_storage_checkpoint_keys_by_types, type_kvengine).Increment(stats.num_keys[i]);
GET_METRIC(tiflash_storage_checkpoint_flow_by_types, type_kvengine).Increment(stats.num_bytes[i]);
GET_METRIC(tiflash_storage_page_data_by_types, type_kvengine).Set(stats.num_existing_bytes[i]);
break;
}
case DB::StorageType::KVStore:
{
GET_METRIC(tiflash_storage_checkpoint_keys_by_types, type_kvstore).Increment(stats.num_keys[i]);
GET_METRIC(tiflash_storage_checkpoint_flow_by_types, type_kvstore).Increment(stats.num_bytes[i]);
GET_METRIC(tiflash_storage_page_data_by_types, type_kvstore).Set(stats.num_existing_bytes[i]);
break;
}
case DB::StorageType::Data:
{
GET_METRIC(tiflash_storage_checkpoint_keys_by_types, type_data).Increment(stats.num_keys[i]);
GET_METRIC(tiflash_storage_checkpoint_flow_by_types, type_data).Increment(stats.num_bytes[i]);
GET_METRIC(tiflash_storage_page_data_by_types, type_data).Set(stats.num_existing_bytes[i]);
break;
}
case DB::StorageType::Log:
{
GET_METRIC(tiflash_storage_checkpoint_keys_by_types, type_log).Increment(stats.num_keys[i]);
GET_METRIC(tiflash_storage_checkpoint_flow_by_types, type_log).Increment(stats.num_bytes[i]);
GET_METRIC(tiflash_storage_page_data_by_types, type_log).Set(stats.num_existing_bytes[i]);
break;
}
case DB::StorageType::Meta:
{
GET_METRIC(tiflash_storage_checkpoint_keys_by_types, type_meta).Increment(stats.num_keys[i]);
GET_METRIC(tiflash_storage_checkpoint_flow_by_types, type_meta).Increment(stats.num_bytes[i]);
GET_METRIC(tiflash_storage_page_data_by_types, type_meta).Set(stats.num_existing_bytes[i]);
break;
}
default:
__builtin_unreachable();
}
}
}

} // namespace DB::PS::V3
Loading