Skip to content

Commit

Permalink
core/memory: Introduce heap profiler
Browse files Browse the repository at this point in the history
Records allocation sites and sizes so that it's later possible to see
what kind of objects are taking up space on the heap.

Adds 8 bytes of memory overhead per live object when compiled-in.

Not compiled-in by default. To do so, configure with SEASTAR_HEAPPROF:

  ./configure.py --cflags=-DSEASTAR_HEAPPROF

To enable recording allocation sites, start seastar application with
the --heapprof flag:

  build/release/memcached --heapprof

It is possible to enable recording in run-time using, for instance, a
RESTful API.

scylla-gdb.py support will follow, with text-mode tree presentation
and dumping to flamegraphs.

Flamegraph example:

  https://cloud.githubusercontent.com/assets/283695/18888026/f69fc4e6-84f6-11e6-9b7b-305667d30f52.png

Text example:

(gdb) scylla heapprof -r
All (275954028, #12783)
 |-- void* memory::cpu_pages::allocate_large_and_trim<memory::cpu_pages::allocate_large_aligned(unsigned int, unsigned int)::{lambda(unsigned int, unsigned int)#1}>(unsigned int, memory::cpu_pages::allocate_large_aligned(unsigned int, unsigned int)::{lambda(unsigned int, unsigned int)#1}) + 169  (268959744, #5)
 |   memory::allocate_large_aligned(unsigned long, unsigned long) + 87
 |    |-- logalloc::segment_zone::segment_zone() + 291  (268435456, #1)
 |    |   logalloc::segment_pool::allocate_segment() + 413
 |    |   logalloc::segment_pool::segment_pool() + 296
 |    |   __tls_init.part.787 + 72
 |    |   logalloc::region_group::release_requests() + 1333
 |    |   logalloc::region_group::add(logalloc::region_group*) + 514
 |    |   database::database(db::config const&) + 4246
 |    |   (...)
 |    |
 |    \-- memory::allocate_aligned(unsigned long, unsigned long) + 13  (524288, #4)
 |         |-- memalign + 9  (262144, #2)
 |         |   db::commitlog::segment_manager::acquire_buffer(unsigned long) + 90
 |         |   db::commitlog::segment::new_buffer(unsigned long) + 113
 |         |   db::commitlog::segment::allocate(utils::UUID const&, shared_ptr<db::commitlog::entry_writer>) + 2347
 |         |    |-- db::commitlog::add_entry(utils::UUID const&, commitlog_entry_writer const&) + 1620  (131072, #1)
 |         |    |   database::do_apply(lw_shared_ptr<schema const>, frozen_mutation const&) + 182
 |         |    |   database::apply(lw_shared_ptr<schema const>, frozen_mutation const&) + 235
 |         |    |   service::storage_proxy::mutate_locally(lw_shared_ptr<schema const> const&, frozen_mutation const&) + 434

Message-Id: <[email protected]>
  • Loading branch information
tgrabiec authored and avikivity committed Oct 1, 2016
1 parent b752610 commit e9ab0a3
Show file tree
Hide file tree
Showing 3 changed files with 234 additions and 5 deletions.
228 changes: 224 additions & 4 deletions core/memory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -71,12 +71,49 @@
#include <cstring>
#include <boost/intrusive/list.hpp>
#include <sys/mman.h>
#include "util/defer.hh"
#include "util/backtrace.hh"

#ifdef HAVE_NUMA
#include <numaif.h>
#endif

struct allocation_site {
size_t count = 0; // number of live objects allocated at backtrace.
size_t size = 0; // amount of bytes in live objects allocated at backtrace.
allocation_site* next = nullptr;
std::vector<uintptr_t> backtrace;

bool operator==(const allocation_site& o) const {
return backtrace == o.backtrace;
}

bool operator!=(const allocation_site& o) const {
return !(*this == o);
}
};

namespace std {

template<>
struct hash<::allocation_site> {
size_t operator()(const ::allocation_site& bi) const {
size_t h = 0;
for (auto addr : bi.backtrace) {
h = ((h << 5) - h) ^ addr;
}
return h;
}
};

}

using allocation_site_ptr = allocation_site*;

namespace memory {

static allocation_site_ptr get_allocation_site() __attribute__((unused));

static std::atomic<bool> abort_on_allocation_failure{false};

void enable_abort_on_allocation_failure() {
Expand Down Expand Up @@ -145,6 +182,14 @@ static char* mem_base() {
return known;
}

constexpr bool is_page_aligned(size_t size) {
return (size & (page_size - 1)) == 0;
}

constexpr size_t next_page_aligned(size_t size) {
return (size + (page_size - 1)) & ~(page_size - 1);
}

class small_pool;

struct free_object {
Expand All @@ -159,6 +204,9 @@ struct page {
page_list_link link;
small_pool* pool; // if used in a small_pool
free_object* freelist;
#ifdef SEASTAR_HEAPPROF
allocation_site_ptr alloc_site; // for objects whose size is multiple of page size, valid for head only
#endif
};

class page_list {
Expand Down Expand Up @@ -229,8 +277,10 @@ class small_pool {
void* allocate();
void deallocate(void* object);
unsigned object_size() const { return _object_size; }
bool objects_page_aligned() const { return is_page_aligned(_object_size); }
static constexpr unsigned size_to_idx(unsigned size);
static constexpr unsigned idx_to_size(unsigned idx);
allocation_site_ptr& alloc_site_holder(void* ptr);
private:
void add_more_objects();
void trim_free_list();
Expand Down Expand Up @@ -283,6 +333,31 @@ class small_pool_array {
static constexpr size_t max_small_allocation
= small_pool::idx_to_size(small_pool_array::nr_small_pools - 1);

constexpr size_t object_size_with_alloc_site(size_t size) {
#ifdef SEASTAR_HEAPPROF
// For page-aligned sizes, allocation_site* lives in page::alloc_site, not with the object.
static_assert(is_page_aligned(max_small_allocation), "assuming that max_small_allocation is page aligned so that we"
" don't need to add allocation_site_ptr to objects of size close to it");
size_t next_page_aligned_size = next_page_aligned(size);
if (next_page_aligned_size - size > sizeof(allocation_site_ptr)) {
size += sizeof(allocation_site_ptr);
} else {
return next_page_aligned_size;
}
#endif
return size;
}

#ifdef SEASTAR_HEAPPROF
// Ensure that object_size_with_alloc_site() does not exceed max_small_allocation
static_assert(object_size_with_alloc_site(max_small_allocation) == max_small_allocation, "");
static_assert(object_size_with_alloc_site(max_small_allocation - 1) == max_small_allocation, "");
static_assert(object_size_with_alloc_site(max_small_allocation - sizeof(allocation_site_ptr) + 1) == max_small_allocation, "");
static_assert(object_size_with_alloc_site(max_small_allocation - sizeof(allocation_site_ptr)) == max_small_allocation, "");
static_assert(object_size_with_alloc_site(max_small_allocation - sizeof(allocation_site_ptr) - 1) == max_small_allocation - 1, "");
static_assert(object_size_with_alloc_site(max_small_allocation - sizeof(allocation_site_ptr) - 2) == max_small_allocation - 2, "");
#endif

struct cross_cpu_free_item {
cross_cpu_free_item* next;
};
Expand Down Expand Up @@ -314,6 +389,18 @@ struct cpu_pages {
alignas(cache_line_size) std::vector<physical_address> virt_to_phys_map;
static std::atomic<unsigned> cpu_id_gen;
static cpu_pages* all_cpus[max_cpus];
union asu {
using alloc_sites_type = std::unordered_set<std::unique_ptr<allocation_site>,
indirect_hash<std::unique_ptr<allocation_site>>,
indirect_equal_to<std::unique_ptr<allocation_site>>>;
asu() {
new (&alloc_sites) alloc_sites_type();
}
~asu() {} // alloc_sites live forever
alloc_sites_type alloc_sites;
} asu;
allocation_site* alloc_site_list_head = nullptr; // For easy traversal of asu.alloc_sites from scylla-gdb.py
bool collect_backtrace = false;
char* mem() { return memory; }

void link(page_list& list, page* span);
Expand Down Expand Up @@ -362,6 +449,20 @@ static thread_local cpu_pages cpu_mem;
std::atomic<unsigned> cpu_pages::cpu_id_gen;
cpu_pages* cpu_pages::all_cpus[max_cpus];

void set_heap_profiling_enabled(bool enable) {
bool is_enabled = cpu_mem.collect_backtrace;
if (enable) {
if (!is_enabled) {
seastar_logger.info("Enabling heap profiler");
}
} else {
if (is_enabled) {
seastar_logger.info("Disabling heap profiler");
}
}
cpu_mem.collect_backtrace = enable;
}

// Free spans are store in the largest index i such that nr_pages >= 1 << i.
static inline
unsigned index_of(unsigned pages) {
Expand Down Expand Up @@ -492,6 +593,14 @@ cpu_pages::allocate_large_and_trim(unsigned n_pages, Trimmer trimmer) {
span->free = span_end->free = false;
span->span_size = span_end->span_size = t.nr_pages;
span->pool = nullptr;
#ifdef SEASTAR_HEAPPROF
auto alloc_site = get_allocation_site();
span->alloc_site = alloc_site;
if (alloc_site) {
++alloc_site->count;
alloc_site->size += span->span_size * page_size;
}
#endif
if (nr_free_pages < current_min_free_pages) {
drain_cross_cpu_freelist();
run_reclaimers(reclaimer_scope::sync);
Expand All @@ -516,25 +625,107 @@ cpu_pages::allocate_large_aligned(unsigned align_pages, unsigned n_pages) {
});
}

#ifdef SEASTAR_HEAPPROF

class disable_backtrace_temporarily {
bool _old;
public:
disable_backtrace_temporarily() {
_old = cpu_mem.collect_backtrace;
cpu_mem.collect_backtrace = false;
}
~disable_backtrace_temporarily() {
cpu_mem.collect_backtrace = _old;
}
};

#else

struct disable_backtrace_temporarily {};

#endif

static
std::vector<uintptr_t> get_backtrace() noexcept {
disable_backtrace_temporarily dbt;
std::vector<uintptr_t> result;
backtrace([&result] (uintptr_t addr) {
result.push_back(addr);
});
return result;
}

static
allocation_site_ptr get_allocation_site() {
if (!cpu_mem.is_initialized() || !cpu_mem.collect_backtrace) {
return nullptr;
}
disable_backtrace_temporarily dbt;
auto new_alloc_site = std::make_unique<allocation_site>();
new_alloc_site->backtrace = get_backtrace();
auto insert_result = cpu_mem.asu.alloc_sites.insert(std::move(new_alloc_site));
allocation_site_ptr alloc_site = insert_result.first->get();
if (insert_result.second) {
alloc_site->next = cpu_mem.alloc_site_list_head;
cpu_mem.alloc_site_list_head = alloc_site;
}
return alloc_site;
}

allocation_site_ptr&
small_pool::alloc_site_holder(void* ptr) {
if (objects_page_aligned()) {
return cpu_mem.to_page(ptr)->alloc_site;
} else {
return *reinterpret_cast<allocation_site_ptr*>(reinterpret_cast<char*>(ptr) + _object_size - sizeof(allocation_site_ptr));
}
}

void*
cpu_pages::allocate_small(unsigned size) {
auto idx = small_pool::size_to_idx(size);
auto& pool = small_pools[idx];
assert(size <= pool.object_size());
return pool.allocate();
auto ptr = pool.allocate();
#ifdef SEASTAR_HEAPPROF
if (!ptr) {
return nullptr;
}
allocation_site* alloc_site = get_allocation_site();
if (alloc_site) {
++alloc_site->count;
alloc_site->size += pool.object_size();
}
pool.alloc_site_holder(ptr) = alloc_site;
#endif
return ptr;
}

void cpu_pages::free_large(void* ptr) {
pageidx idx = (reinterpret_cast<char*>(ptr) - mem()) / page_size;
page* span = &pages[idx];
#ifdef SEASTAR_HEAPPROF
auto alloc_site = span->alloc_site;
if (alloc_site) {
--alloc_site->count;
alloc_site->size -= span->span_size * page_size;
}
#endif
free_span(idx, span->span_size);
}

size_t cpu_pages::object_size(void* ptr) {
pageidx idx = (reinterpret_cast<char*>(ptr) - mem()) / page_size;
page* span = &pages[idx];
if (span->pool) {
return span->pool->object_size();
auto s = span->pool->object_size();
#ifdef SEASTAR_HEAPPROF
// We must not allow the object to be extended onto the allocation_site_ptr field.
if (!span->pool->objects_page_aligned()) {
s -= sizeof(allocation_site_ptr);
}
#endif
return s;
} else {
return size_t(span->span_size) * page_size;
}
Expand Down Expand Up @@ -572,7 +763,15 @@ bool cpu_pages::drain_cross_cpu_freelist() {
void cpu_pages::free(void* ptr) {
page* span = to_page(ptr);
if (span->pool) {
span->pool->deallocate(ptr);
small_pool& pool = *span->pool;
#ifdef SEASTAR_HEAPPROF
allocation_site* alloc_site = pool.alloc_site_holder(ptr);
if (alloc_site) {
--alloc_site->count;
alloc_site->size -= pool.object_size();
}
#endif
pool.deallocate(ptr);
} else {
free_large(ptr);
}
Expand All @@ -584,7 +783,15 @@ void cpu_pages::free(void* ptr, size_t size) {
size = sizeof(free_object);
}
if (size <= max_small_allocation) {
size = object_size_with_alloc_site(size);
auto pool = &small_pools[small_pool::size_to_idx(size)];
#ifdef SEASTAR_HEAPPROF
allocation_site* alloc_site = pool->alloc_site_holder(ptr);
if (alloc_site) {
--alloc_site->count;
alloc_site->size -= pool->object_size();
}
#endif
pool->deallocate(ptr);
} else {
free_large(ptr);
Expand Down Expand Up @@ -614,6 +821,13 @@ void cpu_pages::shrink(void* ptr, size_t new_size) {
if (new_size_pages == old_size_pages) {
return;
}
#ifdef SEASTAR_HEAPPROF
auto alloc_site = span->alloc_site;
if (alloc_site) {
alloc_site->size -= span->span_size * page_size;
alloc_site->size += new_size_pages * page_size;
}
#endif
span->span_size = new_size_pages;
span[new_size_pages - 1].free = false;
span[new_size_pages - 1].span_size = new_size_pages;
Expand Down Expand Up @@ -881,6 +1095,7 @@ small_pool::add_more_objects() {
}
}
while (_free_count < goal) {
disable_backtrace_temporarily dbt;
auto data = reinterpret_cast<char*>(cpu_mem.allocate_large(_span_size));
if (!data) {
return;
Expand Down Expand Up @@ -967,6 +1182,7 @@ void* allocate(size_t size) {
}
void* ptr;
if (size <= max_small_allocation) {
size = object_size_with_alloc_site(size);
ptr = cpu_mem.allocate_small(size);
} else {
ptr = allocate_large(size);
Expand All @@ -987,7 +1203,7 @@ void* allocate_aligned(size_t align, size_t size) {
if (size <= max_small_allocation && align <= page_size) {
// Our small allocator only guarantees alignment for power-of-two
// allocations which are not larger than a page.
size = 1 << log2ceil(size);
size = 1 << log2ceil(object_size_with_alloc_site(size));
ptr = cpu_mem.allocate_small(size);
} else {
ptr = allocate_large_aligned(align, size);
Expand Down Expand Up @@ -1384,6 +1600,10 @@ void operator delete[](void* ptr, with_alignment wa) {

namespace memory {

void set_heap_profiling_enabled(bool enabled) {
seastar_logger.warn("Seastar compiled with default allocator, heap profiler not supported");
}

void enable_abort_on_allocation_failure() {
seastar_logger.warn("Seastar compiled with default allocator, will not abort on bad_alloc");
}
Expand Down
2 changes: 2 additions & 0 deletions core/memory.hh
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ void configure(std::vector<resource::memory> m,

void enable_abort_on_allocation_failure();

void set_heap_profiling_enabled(bool);

void* allocate_reclaimable(size_t size);

enum class reclaiming_result {
Expand Down
Loading

0 comments on commit e9ab0a3

Please sign in to comment.