Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: optimize tcmalloc release memory (#343) #2

Merged
merged 1 commit into from
Nov 29, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 13 additions & 5 deletions src/dist/replication/common/replication_common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,8 @@ replication_options::replication_options()
config_sync_interval_ms = 30000;

mem_release_enabled = true;
mem_release_interval_ms = 86400000;
mem_release_check_interval_ms = 3600000;
mem_release_max_reserved_mem_percentage = 10;

lb_interval_ms = 10000;

Expand Down Expand Up @@ -479,11 +480,18 @@ void replication_options::initialize()
mem_release_enabled,
"whether to enable periodic memory release");

mem_release_interval_ms = (int)dsn_config_get_value_uint64(
mem_release_check_interval_ms = (int)dsn_config_get_value_uint64(
"replication",
"mem_release_interval_ms",
mem_release_interval_ms,
"the replica releases its idle memory to the system every this period of time(ms)");
"mem_release_check_interval_ms",
mem_release_check_interval_ms,
"the replica check if should release memory to the system every this period of time(ms)");

mem_release_max_reserved_mem_percentage = (int)dsn_config_get_value_uint64(
"replication",
"mem_release_max_reserved_mem_percentage",
mem_release_max_reserved_mem_percentage,
"if tcmalloc reserved but not-used memory exceed this percentage of application allocated "
"memory, replica server will release the exceeding memory back to operating system");

lb_interval_ms = (int)dsn_config_get_value_uint64(
"replication",
Expand Down
3 changes: 2 additions & 1 deletion src/dist/replication/common/replication_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,8 @@ class replication_options
int32_t config_sync_interval_ms;

bool mem_release_enabled;
int32_t mem_release_interval_ms;
int32_t mem_release_check_interval_ms;
int32_t mem_release_max_reserved_mem_percentage;

int32_t lb_interval_ms;

Expand Down
92 changes: 81 additions & 11 deletions src/dist/replication/lib/replica_stub.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,11 +68,13 @@ replica_stub::replica_stub(replica_state_subscriber subscriber /*= nullptr*/,
_query_compact_command(nullptr),
_query_app_envs_command(nullptr),
_useless_dir_reserve_seconds_command(nullptr),
_max_reserved_memory_percentage_command(nullptr),
_deny_client(false),
_verbose_client_log(false),
_verbose_commit_log(false),
_gc_disk_error_replica_interval_seconds(3600),
_gc_disk_garbage_replica_interval_seconds(3600),
_mem_release_max_reserved_mem_percentage(10),
_learn_app_concurrent_count(0),
_fs_manager(false)
{
Expand Down Expand Up @@ -317,6 +319,7 @@ void replica_stub::initialize(const replication_options &opts, bool clear /* = f
_verbose_commit_log = _options.verbose_commit_log_on_start;
_gc_disk_error_replica_interval_seconds = _options.gc_disk_error_replica_interval_seconds;
_gc_disk_garbage_replica_interval_seconds = _options.gc_disk_garbage_replica_interval_seconds;
_mem_release_max_reserved_mem_percentage = _options.mem_release_max_reserved_mem_percentage;

// clear dirs if need
if (clear) {
Expand Down Expand Up @@ -638,17 +641,13 @@ void replica_stub::initialize_start()

#ifdef DSN_ENABLE_GPERF
if (_options.mem_release_enabled) {
_mem_release_timer_task =
tasking::enqueue_timer(LPC_MEM_RELEASE,
&_tracker,
[]() {
ddebug("Memory release has started...");
::MallocExtension::instance()->ReleaseFreeMemory();
ddebug("Memory release has ended...");
},
std::chrono::milliseconds(_options.mem_release_interval_ms),
0,
std::chrono::milliseconds(_options.mem_release_interval_ms));
_mem_release_timer_task = tasking::enqueue_timer(
LPC_MEM_RELEASE,
&_tracker,
std::bind(&replica_stub::gc_tcmalloc_memory, this),
std::chrono::milliseconds(_options.mem_release_check_interval_ms),
0,
std::chrono::milliseconds(_options.mem_release_check_interval_ms));
}
#endif

Expand Down Expand Up @@ -2053,6 +2052,35 @@ void replica_stub::open_service()
}
return result;
});

#ifdef DSN_ENABLE_GPERF
_max_reserved_memory_percentage_command = dsn::command_manager::instance().register_app_command(
{"mem-release-max-reserved-percentage"},
"mem-release-max-reserved-percentage [num | DEFAULT]",
"control tcmalloc max reserved but not-used memory percentage",
[this](const std::vector<std::string> &args) {
std::string result("OK");
if (args.empty()) {
// show current value
result = "mem-release-max-reserved-percentage = " +
std::to_string(_mem_release_max_reserved_mem_percentage);
return result;
}
if (args[0] == "DEFAULT") {
// set to default value
_mem_release_max_reserved_mem_percentage =
_options.mem_release_max_reserved_mem_percentage;
return result;
}
int32_t percentage = 0;
if (!dsn::buf2int32(args[0], percentage) || percentage <= 0 || percentage >= 100) {
result = std::string("ERR: invalid arguments");
} else {
_mem_release_max_reserved_mem_percentage = percentage;
}
return result;
});
#endif
}

std::string
Expand Down Expand Up @@ -2178,6 +2206,9 @@ void replica_stub::close()
dsn::command_manager::instance().deregister_command(_query_compact_command);
dsn::command_manager::instance().deregister_command(_query_app_envs_command);
dsn::command_manager::instance().deregister_command(_useless_dir_reserve_seconds_command);
#ifdef DSN_ENABLE_GPERF
dsn::command_manager::instance().deregister_command(_max_reserved_memory_percentage_command);
#endif

_kill_partition_command = nullptr;
_deny_client_command = nullptr;
Expand All @@ -2187,6 +2218,7 @@ void replica_stub::close()
_query_compact_command = nullptr;
_query_app_envs_command = nullptr;
_useless_dir_reserve_seconds_command = nullptr;
_max_reserved_memory_percentage_command = nullptr;

if (_config_sync_timer_task != nullptr) {
_config_sync_timer_task->cancel(true);
Expand Down Expand Up @@ -2304,6 +2336,44 @@ replica_stub::get_child_dir(const char *app_type, gpid child_pid, const std::str
return child_dir;
}

#ifdef DSN_ENABLE_GPERF
// Get tcmalloc numeric property (name is "prop") value.
// Return -1 if get property failed (property we used will be greater than zero)
// Properties can be found in 'gperftools/malloc_extension.h'
static int64_t get_tcmalloc_numeric_property(const char *prop)
{
size_t value;
if (!::MallocExtension::instance()->GetNumericProperty(prop, &value)) {
derror_f("Failed to get tcmalloc property {}", prop);
return -1;
}
return value;
}

void replica_stub::gc_tcmalloc_memory()
{
int64_t total_allocated_bytes =
get_tcmalloc_numeric_property("generic.current_allocated_bytes");
int64_t reserved_bytes = get_tcmalloc_numeric_property("tcmalloc.pageheap_free_bytes");
if (total_allocated_bytes == -1 || reserved_bytes == -1) {
return;
}

int64_t max_reserved_bytes =
total_allocated_bytes * _mem_release_max_reserved_mem_percentage / 100.0;
if (reserved_bytes > max_reserved_bytes) {
int64_t release_bytes = reserved_bytes - max_reserved_bytes;
ddebug_f("Memory release started, almost {} bytes will be released", release_bytes);
while (release_bytes > 0) {
// tcmalloc releasing memory will lock page heap, release 1MB at a time to avoid locking
// page heap for long time
::MallocExtension::instance()->ReleaseToSystem(1024 * 1024);
release_bytes -= 1024 * 1024;
}
}
}
#endif

//
// partition split
//
Expand Down
7 changes: 7 additions & 0 deletions src/dist/replication/lib/replica_stub.h
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,11 @@ class replica_stub : public serverlet<replica_stub>, public ref_counter
partition_status::type status,
error_code error);

#ifdef DSN_ENABLE_GPERF
// Try to release tcmalloc memory back to operating system
void gc_tcmalloc_memory();
#endif

private:
friend class ::dsn::replication::replication_checker;
friend class ::dsn::replication::test::test_checker;
Expand Down Expand Up @@ -305,12 +310,14 @@ class replica_stub : public serverlet<replica_stub>, public ref_counter
dsn_handle_t _query_compact_command;
dsn_handle_t _query_app_envs_command;
dsn_handle_t _useless_dir_reserve_seconds_command;
dsn_handle_t _max_reserved_memory_percentage_command;

bool _deny_client;
bool _verbose_client_log;
bool _verbose_commit_log;
int32_t _gc_disk_error_replica_interval_seconds;
int32_t _gc_disk_garbage_replica_interval_seconds;
int32_t _mem_release_max_reserved_mem_percentage;

// we limit LT_APP max concurrent count, because nfs service implementation is
// too simple, it do not support priority.
Expand Down