Skip to content

Commit

Permalink
Copy just used pages from huge ANONYMOUS|NORESERVE mappings.
Browse files Browse the repository at this point in the history
ASan creates a "shadow" of the used memory.
This is done in a mapping of around 20 TB address space,
but most of it is not yet used.

This patch helps an ASan-enabled rr build in following tests:
  nested_detach
  nested_detach_kill
  nested_detach_kill_stuck
  nested_detach_wait
  nested_release

Avoids error message:
  ERROR: AddressSanitizer: requested allocation size 0x20000000000 (0x20000001000 after adjustments for alignment, red zones etc.) exceeds maximum supported size of 0x10000000000 (thread T0)

Changes in V2:
- Fallback if pagemap is not available in copy_mem_mapping_just_used.
- Allocate memory by std::vector instead of new.
- Attempt to improve readability.
- Explain better intended behaviour of copy_mem_mapping_just_used.
- Added bit 62 (swap) like used in function process_execve.
- Add check for return value of pread.
- Change test to better exercise the handling of consecutive pages
  at the end of a mapping.
  • Loading branch information
bernhardu committed Apr 22, 2023
1 parent 5175548 commit 66f32e2
Show file tree
Hide file tree
Showing 3 changed files with 152 additions and 0 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -934,6 +934,7 @@ set(BASIC_TESTS
daemon
desched_blocking_poll
desched_sigkill
detach_huge_mmap
detach_state
detach_threads
detach_sigkill
Expand Down
77 changes: 77 additions & 0 deletions src/Task.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3696,6 +3696,73 @@ static void copy_mem_mapping(Task* from, Task* to, const KernelMapping& km) {
}
}

static bool copy_mem_mapping_just_used(Task* from, Task* to, const KernelMapping& km)
{
ScopedFd& fd = from->pagemap_fd();
if (!fd.is_open()) {
LOG(debug) << "Failed to open " << from->proc_pagemap_path();
return false;
}

size_t pagesize = page_size();

vector<uint64_t> buf;
#if defined(__i386__)
const auto buf_page_count = 64*1024;
#else
const auto buf_page_count = 1024*1024;
#endif
buf.resize(buf_page_count);

uint64_t pages_present = 0; // Just for logging

for (uintptr_t offset = 0; offset < km.size(); offset += buf.size() * pagesize) {

auto read_offset = ((km.start().as_int() + offset) / pagesize) * sizeof(uint64_t);
auto read_count = buf.size() * sizeof(uint64_t);
ssize_t bytes_read = pread(fd, buf.data(), read_count, read_offset);
if (bytes_read <= 0) {
FATAL() << "pread of pagemap fd failed, errno=" << errno;
}

// A chunk was read from pagemap above, now iterate through it to detect
// if memory is physically present (bit 63) or in swap (bit 62) in Task "from".
// If yes, just transfer those pages to the new Task "to".
// Also try to find consecutive pages to copy them in one operation.
// The file /proc/PID/pagemap consists of 64-bit values, each describing
// the state of one page. See https://www.kernel.org/doc/Documentation/vm/pagemap.txt

for (size_t page = 0; page < buf.size() && page < bytes_read / sizeof(uint64_t); ++page) {
if (buf[page] & ((1ULL << 63) | (1ULL << 62))) {
auto start = km.start() + offset + page * pagesize;
if (start >= km.end()) {
break;
}
++pages_present;

// Check for consecutive used pages
while (page < buf.size()-1 &&
buf[page + 1] & ((1ULL << 63) | (1ULL << 62)))
{
if (km.start() + offset + pagesize * (page + 1) >= km.end()) {
break;
}
++page;
++pages_present;
}

auto end = km.start() + offset + pagesize * (page + 1);
LOG(debug) << km << " copying start: 0x" << hex << start << " end: 0x" << end
<< dec << " pages: " << (end - start) / pagesize;
auto pages = km.subrange(start, end);
copy_mem_mapping(from, to, pages);
}
}
}
LOG(debug) << km << " pages_present: " << pages_present << " pages_total: " << km.size() / pagesize;
return true;
}

static void move_vdso_mapping(AutoRemoteSyscalls &remote, const KernelMapping &km) {
for (const auto& m : remote.task()->vm()->maps()) {
if (m.map.is_vdso() && m.map.start() != km.start()) {
Expand Down Expand Up @@ -3783,6 +3850,16 @@ void Task::dup_from(Task *other) {
create_mapping(this, remote_this, km);
LOG(debug) << "Copying mapping into " << tid;
if (!(km.flags() & MAP_SHARED)) {
// Make the effort just for bigger mappings, copy smaller as a whole.
if (km.flags() & (MAP_ANONYMOUS | MAP_NORESERVE) &&
km.size() >= 0x10000000/*256MB*/)
{
LOG(debug) << "Using copy_mem_mapping_just_used";
if (copy_mem_mapping_just_used(other, this, km)) {
continue;
}
LOG(debug) << "Fallback to copy_mem_mapping";
}
copy_mem_mapping(other, this, km);
}
}
Expand Down
74 changes: 74 additions & 0 deletions src/test/detach_huge_mmap.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */

#include "util_internal.h"

static const int magic = 0xab;
static uint64_t size = 0x10000000; /* 256 MB, at least the value in Task::dup_from */
static size_t page_size;
static void* pages[10];
static unsigned int idx; /*next index of pages*/

void test_alloc(char* mem, unsigned int count, off_t offset) {

test_assert(0 == munmap(mem + size, page_size));

/* one page near the start */
test_assert(idx < sizeof(pages)/sizeof(pages[0]));
pages[idx] = mem + page_size;
memset(pages[idx], magic, page_size);
idx++;

/* one or more pages near or at the end */
for (unsigned int i = 0; i < count; i++) {
test_assert(idx < sizeof(pages)/sizeof(pages[0]));
pages[idx] = mem + offset + i * page_size;
memset(pages[idx], magic, page_size);
idx++;
}
}

int main(void) {
page_size = sysconf(_SC_PAGESIZE);

/* Create one big mapping, then break it up by munmap
* into smaller ones, to better test the handling in
* the end of mappings. */

void* mem1 = mmap(NULL, 4 * (size + page_size), PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
test_assert(mem1 != MAP_FAILED);

void* mem2 = mem1 + size + page_size;
void* mem3 = mem2 + size + page_size;
void* mem4 = mem3 + size + page_size;

test_alloc(mem1, 1, size - page_size); /* one page used at last page */
test_alloc(mem2, 1, size - page_size * 2); /* one page used before last page */
test_alloc(mem3, 2, size - page_size * 2); /* two consecutive pages at last two pages */
test_alloc(mem4, 2, size - page_size * 3); /* two consecutive pages before last page */

pid_t pid = fork();
if (pid == 0) {
if (running_under_rr()) {
rr_detach_teleport();
}

/* create one page for easier comparison */
char* cmp = malloc(page_size * 3);
test_assert(cmp != NULL);
memset(cmp, magic, page_size * 3);

/* check if the saved pages have the expected value */
for (unsigned int i = 0; i < idx; i++) {
test_assert(memcmp(pages[i], cmp, page_size) == 0);
}

return 0;
}

int status;
wait(&status);
test_assert(WIFEXITED(status) && WEXITSTATUS(status) == 0);
atomic_puts("EXIT-SUCCESS");
return 0;
}

0 comments on commit 66f32e2

Please sign in to comment.