Skip to content
This repository has been archived by the owner on Jun 23, 2022. It is now read-only.

feat: update the way to get heap profile #433

Merged
merged 16 commits into from
Apr 28, 2020
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 47 additions & 14 deletions src/dist/http/pprof_http_service.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,12 @@

#include "pprof_http_service.h"

#include <dsn/dist/fmt_logging.h>
#include <dsn/utility/string_conv.h>
#include <dsn/utility/defer.h>
#include <dsn/utility/timer.h>
#include <dsn/utility/string_splitter.h>
#include <gperftools/heap-profiler.h>
#include <gperftools/malloc_extension.h>
#include <gperftools/profiler.h>

Expand Down Expand Up @@ -319,26 +321,39 @@ void pprof_http_service::symbol_handler(const http_request &req, http_response &
// //
// == ip:port/pprof/heap == //
// //
void pprof_http_service::heap_handler(const http_request &req, http_response &resp)
{
bool in_pprof = false;
if (!_in_pprof_action.compare_exchange_strong(in_pprof, true)) {
dwarn_f("node is already exectuting pprof action, please wait and retry");
resp.status_code = http_status_code::internal_server_error;
Comment on lines +328 to +329
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
dwarn_f("node is already exectuting pprof action, please wait and retry");
resp.status_code = http_status_code::internal_server_error;
resp.body = "node is already exectuting pprof action, please wait and retry";
resp.status_code = http_status_code::internal_server_error;

return;
}

static constexpr const char *TCMALLOC_SAMPLE_PARAMETER = "TCMALLOC_SAMPLE_PARAMETER";

static bool is_heap_profile_enabled() { return ::getenv(TCMALLOC_SAMPLE_PARAMETER) != nullptr; }
const std::string SECOND = "seconds";
neverchanje marked this conversation as resolved.
Show resolved Hide resolved
const uint32_t kDefaultSecond = 10;

static bool get_heap_profile(std::string &result)
{
if (!is_heap_profile_enabled()) {
result = "no TCMALLOC_SAMPLE_PARAMETER in env";
return false;
// get seconds from query params, default value is `kDefaultSecond`
uint32_t seconds = kDefaultSecond;
const auto iter = req.query_args.find(SECOND);
if (iter != req.query_args.end()) {
const auto seconds_str = iter->second;
dsn::internal::buf2unsigned(seconds_str, seconds);
}
MallocExtension::instance()->GetHeapSample(&result);
return true;
}

void pprof_http_service::heap_handler(const http_request &req, http_response &resp)
{
std::stringstream profile_name_prefix;
profile_name_prefix << "heap_profile." << getpid() << "." << dsn_now_ns();

HeapProfilerStart(profile_name_prefix.str().c_str());
sleep(seconds);
const char *profile = GetHeapProfile();
HeapProfilerStop();

resp.status_code = http_status_code::ok;
resp.body = profile;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Has the problem that response too large and http server can not deal with been solved?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have read source code, and find that GetHeapProfile will return buffers at most 1MB.

// We use buffers of this size in DoGetHeapProfile.
static const int kProfileBufferSize = 1 << 20;

During my test, I have observed that some middle profile files whose size will not exceed 1M, it proves the file size limit.
Besides, I have read google pprof source code roughly, find that pprof toolset will collect several heap profile files, and combine them into one file, then serialize, compress the file. I suppose that pprof toolset did something to get whole profile file.

delete profile;

get_heap_profile(resp.body);
_in_pprof_action.store(false);
}

// //
Expand Down Expand Up @@ -405,9 +420,18 @@ void pprof_http_service::cmdline_handler(const http_request &req, http_response

void pprof_http_service::growth_handler(const http_request &req, http_response &resp)
{
bool in_pprof = false;
if (!_in_pprof_action.compare_exchange_strong(in_pprof, true)) {
dwarn_f("node is already exectuting pprof action, please wait and retry");
resp.status_code = http_status_code::internal_server_error;
Comment on lines +425 to +426
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
dwarn_f("node is already exectuting pprof action, please wait and retry");
resp.status_code = http_status_code::internal_server_error;
resp.body = "node is already exectuting pprof action, please wait and retry";
resp.status_code = http_status_code::internal_server_error;

return;
}

MallocExtension *malloc_ext = MallocExtension::instance();
ddebug("received requests for growth profile");
malloc_ext->GetHeapGrowthStacks(&resp.body);

_in_pprof_action.store(false);
}

// //
Expand Down Expand Up @@ -439,6 +463,13 @@ static bool get_cpu_profile(std::string &result, useconds_t seconds)

void pprof_http_service::profile_handler(const http_request &req, http_response &resp)
{
bool in_pprof = false;
if (!_in_pprof_action.compare_exchange_strong(in_pprof, true)) {
dwarn_f("node is already exectuting pprof action, please wait and retry");
resp.status_code = http_status_code::internal_server_error;
Comment on lines +468 to +469
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
dwarn_f("node is already exectuting pprof action, please wait and retry");
resp.status_code = http_status_code::internal_server_error;
resp.body = "node is already exectuting pprof action, please wait and retry";
resp.status_code = http_status_code::internal_server_error;

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I consider that Filling error msg into response body is to tell client the reason why this request failed. However, I tried to this, and found out the pprof only print http status_code while meet error, like below:

> go tool pprof -symbolize=none --seconds=5 http://<ip>:<port>/pprof/heap
Fetching profile over HTTP from http:/<ip>:<port>/pprof/heap?seconds=5
Please wait... (5s)
http://<ip>:<port>/pprof/heap: server response: 500 Internal Server Error
failed to fetch any source profiles

I think we should print error log in server side, http status code is enough for client.

return;
}

useconds_t seconds = 60000000;

const char *req_url = req.full_url.to_string().data();
Expand All @@ -461,6 +492,8 @@ void pprof_http_service::profile_handler(const http_request &req, http_response
resp.status_code = http_status_code::ok;

get_cpu_profile(resp.body, seconds);

_in_pprof_action.store(false);
}

} // namespace dsn
Expand Down
3 changes: 3 additions & 0 deletions src/dist/http/pprof_http_service.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ class pprof_http_service : public http_service
void growth_handler(const http_request &req, http_response &resp);

void profile_handler(const http_request &req, http_response &resp);

private:
std::atomic_bool _in_pprof_action{false};
};

} // namespace dsn
Expand Down