Skip to content

Commit

Permalink
mm: introduce mf_dax_kill_procs() for fsdax case
Browse files Browse the repository at this point in the history
This new function is a variant of mf_generic_kill_procs that accepts a
file, offset pair instead of a struct to support multiple files sharing a
DAX mapping.  It is intended to be called by the file systems as part of
the memory_failure handler after the file system performed a reverse
mapping from the storage address to the file and file offset.

Link: https://lkml.kernel.org/r/[email protected]
Signed-off-by: Shiyang Ruan <[email protected]>
Reviewed-by: Dan Williams <[email protected]>
Reviewed-by: Christoph Hellwig <[email protected]>
Reviewed-by: Darrick J. Wong <[email protected]>
Reviewed-by: Miaohe Lin <[email protected]>
Cc: Al Viro <[email protected]>
Cc: Dan Williams <[email protected]>
Cc: Dave Chinner <[email protected]>
Cc: Goldwyn Rodrigues <[email protected]>
Cc: Goldwyn Rodrigues <[email protected]>
Cc: Jane Chu <[email protected]>
Cc: Matthew Wilcox <[email protected]>
Cc: Naoya Horiguchi <[email protected]>
Cc: Ritesh Harjani <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
  • Loading branch information
irides authored and akpm00 committed Jul 18, 2022
1 parent 2f437ef commit c36e202
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 10 deletions.
2 changes: 2 additions & 0 deletions include/linux/mm.h
Original file line number Diff line number Diff line change
Expand Up @@ -3178,6 +3178,8 @@ enum mf_flags {
MF_UNPOISON = 1 << 4,
MF_SW_SIMULATED = 1 << 5,
};
int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index,
unsigned long count, int mf_flags);
extern int memory_failure(unsigned long pfn, int flags);
extern void memory_failure_queue(unsigned long pfn, int flags);
extern void memory_failure_queue_kick(int cpu);
Expand Down
96 changes: 86 additions & 10 deletions mm/memory-failure.c
Original file line number Diff line number Diff line change
Expand Up @@ -297,10 +297,9 @@ void shake_page(struct page *p)
}
EXPORT_SYMBOL_GPL(shake_page);

static unsigned long dev_pagemap_mapping_shift(struct page *page,
struct vm_area_struct *vma)
static unsigned long dev_pagemap_mapping_shift(struct vm_area_struct *vma,
unsigned long address)
{
unsigned long address = vma_address(page, vma);
unsigned long ret = 0;
pgd_t *pgd;
p4d_t *p4d;
Expand Down Expand Up @@ -340,10 +339,14 @@ static unsigned long dev_pagemap_mapping_shift(struct page *page,
/*
* Schedule a process for later kill.
* Uses GFP_ATOMIC allocations to avoid potential recursions in the VM.
*
* Notice: @fsdax_pgoff is used only when @p is a fsdax page.
* In other cases, such as anonymous and file-backend page, the address to be
* killed can be caculated by @p itself.
*/
static void add_to_kill(struct task_struct *tsk, struct page *p,
struct vm_area_struct *vma,
struct list_head *to_kill)
pgoff_t fsdax_pgoff, struct vm_area_struct *vma,
struct list_head *to_kill)
{
struct to_kill *tk;

Expand All @@ -354,9 +357,15 @@ static void add_to_kill(struct task_struct *tsk, struct page *p,
}

tk->addr = page_address_in_vma(p, vma);
if (is_zone_device_page(p))
tk->size_shift = dev_pagemap_mapping_shift(p, vma);
else
if (is_zone_device_page(p)) {
/*
* Since page->mapping is not used for fsdax, we need
* calculate the address based on the vma.
*/
if (p->pgmap->type == MEMORY_DEVICE_FS_DAX)
tk->addr = vma_pgoff_address(fsdax_pgoff, 1, vma);
tk->size_shift = dev_pagemap_mapping_shift(vma, tk->addr);
} else
tk->size_shift = page_shift(compound_head(p));

/*
Expand Down Expand Up @@ -505,7 +514,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
if (!page_mapped_in_vma(page, vma))
continue;
if (vma->vm_mm == t->mm)
add_to_kill(t, page, vma, to_kill);
add_to_kill(t, page, 0, vma, to_kill);
}
}
read_unlock(&tasklist_lock);
Expand Down Expand Up @@ -541,13 +550,41 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
* to be informed of all such data corruptions.
*/
if (vma->vm_mm == t->mm)
add_to_kill(t, page, vma, to_kill);
add_to_kill(t, page, 0, vma, to_kill);
}
}
read_unlock(&tasklist_lock);
i_mmap_unlock_read(mapping);
}

#ifdef CONFIG_FS_DAX
/*
* Collect processes when the error hit a fsdax page.
*/
static void collect_procs_fsdax(struct page *page,
struct address_space *mapping, pgoff_t pgoff,
struct list_head *to_kill)
{
struct vm_area_struct *vma;
struct task_struct *tsk;

i_mmap_lock_read(mapping);
read_lock(&tasklist_lock);
for_each_process(tsk) {
struct task_struct *t = task_early_kill(tsk, true);

if (!t)
continue;
vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
if (vma->vm_mm == t->mm)
add_to_kill(t, page, pgoff, vma, to_kill);
}
}
read_unlock(&tasklist_lock);
i_mmap_unlock_read(mapping);
}
#endif /* CONFIG_FS_DAX */

/*
* Collect the processes who have the corrupted page mapped to kill.
*/
Expand Down Expand Up @@ -1588,6 +1625,45 @@ static int mf_generic_kill_procs(unsigned long long pfn, int flags,
return rc;
}

#ifdef CONFIG_FS_DAX
/**
* mf_dax_kill_procs - Collect and kill processes who are using this file range
* @mapping: address_space of the file in use
* @index: start pgoff of the range within the file
* @count: length of the range, in unit of PAGE_SIZE
* @mf_flags: memory failure flags
*/
int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index,
unsigned long count, int mf_flags)
{
LIST_HEAD(to_kill);
dax_entry_t cookie;
struct page *page;
size_t end = index + count;

mf_flags |= MF_ACTION_REQUIRED | MF_MUST_KILL;

for (; index < end; index++) {
page = NULL;
cookie = dax_lock_mapping_entry(mapping, index, &page);
if (!cookie)
return -EBUSY;
if (!page)
goto unlock;

SetPageHWPoison(page);

collect_procs_fsdax(page, mapping, index, &to_kill);
unmap_and_kill(&to_kill, page_to_pfn(page), mapping,
index, mf_flags);
unlock:
dax_unlock_mapping_entry(mapping, index, cookie);
}
return 0;
}
EXPORT_SYMBOL_GPL(mf_dax_kill_procs);
#endif /* CONFIG_FS_DAX */

/*
* Called from hugetlb code with hugetlb_lock held.
*
Expand Down

0 comments on commit c36e202

Please sign in to comment.