Skip to content
This repository has been archived by the owner on Jun 23, 2022. It is now read-only.

feat: optimize tcmalloc release memory #343

Merged
merged 10 commits into from
Nov 29, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 13 additions & 5 deletions src/dist/replication/common/replication_common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,8 @@ replication_options::replication_options()
config_sync_interval_ms = 30000;

mem_release_enabled = true;
mem_release_interval_ms = 86400000;
mem_release_check_interval_ms = 3600000;
mem_release_max_reserved_mem_percentage = 10;

lb_interval_ms = 10000;

Expand Down Expand Up @@ -479,11 +480,18 @@ void replication_options::initialize()
mem_release_enabled,
"whether to enable periodic memory release");

mem_release_interval_ms = (int)dsn_config_get_value_uint64(
mem_release_check_interval_ms = (int)dsn_config_get_value_uint64(
"replication",
"mem_release_interval_ms",
mem_release_interval_ms,
"the replica releases its idle memory to the system every this period of time(ms)");
"mem_release_check_interval_ms",
mem_release_check_interval_ms,
"the replica check if should release memory to the system every this period of time(ms)");

mem_release_max_reserved_mem_percentage = (int)dsn_config_get_value_uint64(
"replication",
"mem_release_max_reserved_mem_percentage",
mem_release_max_reserved_mem_percentage,
"if tcmalloc reserved but not-used memory exceed this percentage of application allocated "
"memory, replica server will release the exceeding memory back to operating system");

lb_interval_ms = (int)dsn_config_get_value_uint64(
"replication",
Expand Down
3 changes: 2 additions & 1 deletion src/dist/replication/common/replication_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,8 @@ class replication_options
int32_t config_sync_interval_ms;

bool mem_release_enabled;
int32_t mem_release_interval_ms;
int32_t mem_release_check_interval_ms;
int32_t mem_release_max_reserved_mem_percentage;

int32_t lb_interval_ms;

Expand Down
92 changes: 81 additions & 11 deletions src/dist/replication/lib/replica_stub.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,11 +68,13 @@ replica_stub::replica_stub(replica_state_subscriber subscriber /*= nullptr*/,
_query_compact_command(nullptr),
_query_app_envs_command(nullptr),
_useless_dir_reserve_seconds_command(nullptr),
_max_reserved_memory_percentage_command(nullptr),
_deny_client(false),
_verbose_client_log(false),
_verbose_commit_log(false),
_gc_disk_error_replica_interval_seconds(3600),
_gc_disk_garbage_replica_interval_seconds(3600),
_mem_release_max_reserved_mem_percentage(10),
_learn_app_concurrent_count(0),
_fs_manager(false)
{
Expand Down Expand Up @@ -317,6 +319,7 @@ void replica_stub::initialize(const replication_options &opts, bool clear /* = f
_verbose_commit_log = _options.verbose_commit_log_on_start;
_gc_disk_error_replica_interval_seconds = _options.gc_disk_error_replica_interval_seconds;
_gc_disk_garbage_replica_interval_seconds = _options.gc_disk_garbage_replica_interval_seconds;
_mem_release_max_reserved_mem_percentage = _options.mem_release_max_reserved_mem_percentage;

// clear dirs if need
if (clear) {
Expand Down Expand Up @@ -638,17 +641,13 @@ void replica_stub::initialize_start()

#ifdef DSN_ENABLE_GPERF
if (_options.mem_release_enabled) {
_mem_release_timer_task =
tasking::enqueue_timer(LPC_MEM_RELEASE,
&_tracker,
[]() {
ddebug("Memory release has started...");
::MallocExtension::instance()->ReleaseFreeMemory();
ddebug("Memory release has ended...");
},
std::chrono::milliseconds(_options.mem_release_interval_ms),
0,
std::chrono::milliseconds(_options.mem_release_interval_ms));
_mem_release_timer_task = tasking::enqueue_timer(
LPC_MEM_RELEASE,
&_tracker,
std::bind(&replica_stub::gc_tcmalloc_memory, this),
std::chrono::milliseconds(_options.mem_release_check_interval_ms),
0,
std::chrono::milliseconds(_options.mem_release_check_interval_ms));
}
#endif

Expand Down Expand Up @@ -2053,6 +2052,35 @@ void replica_stub::open_service()
}
return result;
});

#ifdef DSN_ENABLE_GPERF
_max_reserved_memory_percentage_command = dsn::command_manager::instance().register_app_command(
hycdong marked this conversation as resolved.
Show resolved Hide resolved
{"mem-release-max-reserved-percentage"},
"mem-release-max-reserved-percentage [num | DEFAULT]",
"control tcmalloc max reserved but not-used memory percentage",
[this](const std::vector<std::string> &args) {
std::string result("OK");
if (args.empty()) {
// show current value
result = "mem-release-max-reserved-percentage = " +
std::to_string(_mem_release_max_reserved_mem_percentage);
return result;
}
if (args[0] == "DEFAULT") {
// set to default value
_mem_release_max_reserved_mem_percentage =
_options.mem_release_max_reserved_mem_percentage;
return result;
}
int32_t percentage = 0;
if (!dsn::buf2int32(args[0], percentage) || percentage <= 0 || percentage >= 100) {
result = std::string("ERR: invalid arguments");
} else {
_mem_release_max_reserved_mem_percentage = percentage;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这个 remote command 还是建议抽出来一个函数然后写单测

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 这几处代码都很类似,可以抽象出一个函数来, 几个remote command都复用。另起一个PR做吧

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

嗯,remote command整个可以抽出来,之后的pr再做吧

}
return result;
});
#endif
}

std::string
Expand Down Expand Up @@ -2178,6 +2206,9 @@ void replica_stub::close()
dsn::command_manager::instance().deregister_command(_query_compact_command);
dsn::command_manager::instance().deregister_command(_query_app_envs_command);
dsn::command_manager::instance().deregister_command(_useless_dir_reserve_seconds_command);
#ifdef DSN_ENABLE_GPERF
dsn::command_manager::instance().deregister_command(_max_reserved_memory_percentage_command);
#endif

_kill_partition_command = nullptr;
_deny_client_command = nullptr;
Expand All @@ -2187,6 +2218,7 @@ void replica_stub::close()
_query_compact_command = nullptr;
_query_app_envs_command = nullptr;
_useless_dir_reserve_seconds_command = nullptr;
_max_reserved_memory_percentage_command = nullptr;

if (_config_sync_timer_task != nullptr) {
_config_sync_timer_task->cancel(true);
Expand Down Expand Up @@ -2304,6 +2336,44 @@ replica_stub::get_child_dir(const char *app_type, gpid child_pid, const std::str
return child_dir;
}

#ifdef DSN_ENABLE_GPERF
// Get tcmalloc numeric property (name is "prop") value.
// Return -1 if get property failed (property we used will be greater than zero)
// Properties can be found in 'gperftools/malloc_extension.h'
static int64_t get_tcmalloc_numeric_property(const char *prop)
{
size_t value;
if (!::MallocExtension::instance()->GetNumericProperty(prop, &value)) {
derror_f("Failed to get tcmalloc property {}", prop);
return -1;
}
return value;
}

void replica_stub::gc_tcmalloc_memory()
{
int64_t total_allocated_bytes =
get_tcmalloc_numeric_property("generic.current_allocated_bytes");
int64_t reserved_bytes = get_tcmalloc_numeric_property("tcmalloc.pageheap_free_bytes");
if (total_allocated_bytes == -1 || reserved_bytes == -1) {
return;
}

int64_t max_reserved_bytes =
total_allocated_bytes * _mem_release_max_reserved_mem_percentage / 100.0;
if (reserved_bytes > max_reserved_bytes) {
int64_t release_bytes = reserved_bytes - max_reserved_bytes;
ddebug_f("Memory release started, almost {} bytes will be released", release_bytes);
while (release_bytes > 0) {
// tcmalloc releasing memory will lock page heap, release 1MB at a time to avoid locking
// page heap for long time
::MallocExtension::instance()->ReleaseToSystem(1024 * 1024);
release_bytes -= 1024 * 1024;
}
}
}
#endif

//
// partition split
//
Expand Down
7 changes: 7 additions & 0 deletions src/dist/replication/lib/replica_stub.h
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,11 @@ class replica_stub : public serverlet<replica_stub>, public ref_counter
partition_status::type status,
error_code error);

#ifdef DSN_ENABLE_GPERF
// Try to release tcmalloc memory back to operating system
void gc_tcmalloc_memory();
#endif

private:
friend class ::dsn::replication::replication_checker;
friend class ::dsn::replication::test::test_checker;
Expand Down Expand Up @@ -305,12 +310,14 @@ class replica_stub : public serverlet<replica_stub>, public ref_counter
dsn_handle_t _query_compact_command;
dsn_handle_t _query_app_envs_command;
dsn_handle_t _useless_dir_reserve_seconds_command;
dsn_handle_t _max_reserved_memory_percentage_command;

bool _deny_client;
bool _verbose_client_log;
bool _verbose_commit_log;
int32_t _gc_disk_error_replica_interval_seconds;
int32_t _gc_disk_garbage_replica_interval_seconds;
int32_t _mem_release_max_reserved_mem_percentage;

// we limit LT_APP max concurrent count, because nfs service implementation is
// too simple, it do not support priority.
Expand Down