Skip to content

Commit

Permalink
aio: clean up and fix aio_setup_ring page mapping
Browse files Browse the repository at this point in the history
Since commit 36bc08c ("fs/aio: Add support to aio ring pages
migration") the aio ring setup code has used a special per-ring backing
inode for the page allocations, rather than just using random anonymous
pages.

However, rather than remembering the pages as it allocated them, it
would allocate the pages, insert them into the file mapping (dirty, so
that they couldn't be free'd), and then forget about them.  And then to
look them up again, it would mmap the mapping, and then use
"get_user_pages()" to get back an array of the pages we just created.

Now, not only is that incredibly inefficient, it also leaked all the
pages if the mmap failed (which could happen due to excessive number of
mappings, for example).

So clean it all up, making it much more straightforward.  Also remove
some left-overs of the previous (broken) mm_populate() usage that was
removed in commit d6c355c ("aio: fix race in ring buffer page
lookup introduced by page migration support") but left the pointless and
now misleading MAP_POPULATE flag around.

Tested-and-acked-by: Benjamin LaHaise <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
torvalds committed Dec 22, 2013
1 parent b7000ad commit 3dc9acb
Showing 1 changed file with 23 additions and 35 deletions.
58 changes: 23 additions & 35 deletions fs/aio.c
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ static int aio_setup_ring(struct kioctx *ctx)
struct aio_ring *ring;
unsigned nr_events = ctx->max_reqs;
struct mm_struct *mm = current->mm;
unsigned long size, populate;
unsigned long size, unused;
int nr_pages;
int i;
struct file *file;
Expand All @@ -347,6 +347,20 @@ static int aio_setup_ring(struct kioctx *ctx)
return -EAGAIN;
}

ctx->aio_ring_file = file;
nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring))
/ sizeof(struct io_event);

ctx->ring_pages = ctx->internal_pages;
if (nr_pages > AIO_RING_PAGES) {
ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *),
GFP_KERNEL);
if (!ctx->ring_pages) {
put_aio_ring_file(ctx);
return -ENOMEM;
}
}

for (i = 0; i < nr_pages; i++) {
struct page *page;
page = find_or_create_page(file->f_inode->i_mapping,
Expand All @@ -358,19 +372,14 @@ static int aio_setup_ring(struct kioctx *ctx)
SetPageUptodate(page);
SetPageDirty(page);
unlock_page(page);

ctx->ring_pages[i] = page;
}
ctx->aio_ring_file = file;
nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring))
/ sizeof(struct io_event);
ctx->nr_pages = i;

ctx->ring_pages = ctx->internal_pages;
if (nr_pages > AIO_RING_PAGES) {
ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *),
GFP_KERNEL);
if (!ctx->ring_pages) {
put_aio_ring_file(ctx);
return -ENOMEM;
}
if (unlikely(i != nr_pages)) {
aio_free_ring(ctx);
return -EAGAIN;
}

ctx->mmap_size = nr_pages * PAGE_SIZE;
Expand All @@ -379,37 +388,16 @@ static int aio_setup_ring(struct kioctx *ctx)
down_write(&mm->mmap_sem);
ctx->mmap_base = do_mmap_pgoff(ctx->aio_ring_file, 0, ctx->mmap_size,
PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, 0, &populate);
MAP_SHARED, 0, &unused);
up_write(&mm->mmap_sem);
if (IS_ERR((void *)ctx->mmap_base)) {
up_write(&mm->mmap_sem);
ctx->mmap_size = 0;
aio_free_ring(ctx);
return -EAGAIN;
}

pr_debug("mmap address: 0x%08lx\n", ctx->mmap_base);

/* We must do this while still holding mmap_sem for write, as we
* need to be protected against userspace attempting to mremap()
* or munmap() the ring buffer.
*/
ctx->nr_pages = get_user_pages(current, mm, ctx->mmap_base, nr_pages,
1, 0, ctx->ring_pages, NULL);

/* Dropping the reference here is safe as the page cache will hold
* onto the pages for us. It is also required so that page migration
* can unmap the pages and get the right reference count.
*/
for (i = 0; i < ctx->nr_pages; i++)
put_page(ctx->ring_pages[i]);

up_write(&mm->mmap_sem);

if (unlikely(ctx->nr_pages != nr_pages)) {
aio_free_ring(ctx);
return -EAGAIN;
}

ctx->user_id = ctx->mmap_base;
ctx->nr_events = nr_events; /* trusted copy */

Expand Down

0 comments on commit 3dc9acb

Please sign in to comment.