diff --git a/mm/mmap.c b/mm/mmap.c index fba57c6286719..dd52d8764c2fd 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2421,8 +2421,22 @@ struct vm_area_struct *get_vma(struct mm_struct *mm, unsigned long addr) read_lock(&mm->mm_rb_lock); vma = __find_vma(mm, addr); - if (vma) - atomic_inc(&vma->vm_ref_count); + + /* + * If there is a concurrent fast mremap, bail out since the entire + * PMD/PUD subtree may have been remapped. + * + * This is usually safe for conventional mremap since it takes the + * PTE locks as does SPF. However fast mremap only takes the lock + * at the PMD/PUD level which is ok as it is done with the mmap + * write lock held. But since SPF, as the term implies forgoes, + * taking the mmap read lock and also cannot take PTL lock at the + * larger PMD/PUD granualrity, since it would introduce huge + * contention in the page fault path; fall back to regular fault + * handling. + */ + if (vma && !atomic_inc_unless_negative(&vma->vm_ref_count)) + vma = NULL; read_unlock(&mm->mm_rb_lock); return vma; diff --git a/mm/mremap.c b/mm/mremap.c index 5a18cec23fa70..b1263e9a16afe 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -210,11 +210,39 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, drop_rmap_locks(vma); } +#ifdef CONFIG_SPECULATIVE_PAGE_FAULT +static inline bool trylock_vma_ref_count(struct vm_area_struct *vma) +{ + /* + * If we have the only reference, swap the refcount to -1. This + * will prevent other concurrent references by get_vma() for SPFs. + */ + return atomic_cmpxchg(&vma->vm_ref_count, 1, -1) == 1; +} + /* - * Speculative page fault handlers will not detect page table changes done - * without ptl locking. + * Restore the VMA reference count to 1 after a fast mremap. */ -#if defined(CONFIG_HAVE_MOVE_PMD) && !defined(CONFIG_SPECULATIVE_PAGE_FAULT) +static inline void unlock_vma_ref_count(struct vm_area_struct *vma) +{ + /* + * This should only be called after a corresponding, + * successful trylock_vma_ref_count(). + */ + VM_BUG_ON_VMA(atomic_cmpxchg(&vma->vm_ref_count, -1, 1) != -1, + vma); +} +#else /* !CONFIG_SPECULATIVE_PAGE_FAULT */ +static inline bool trylock_vma_ref_count(struct vm_area_struct *vma) +{ + return true; +} +static inline void unlock_vma_ref_count(struct vm_area_struct *vma) +{ +} +#endif /* CONFIG_SPECULATIVE_PAGE_FAULT */ + +#ifdef CONFIG_HAVE_MOVE_PMD static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr, unsigned long new_addr, pmd_t *old_pmd, pmd_t *new_pmd) { @@ -248,6 +276,14 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr, if (WARN_ON_ONCE(!pmd_none(*new_pmd))) return false; + /* + * We hold both exclusive mmap_lock and rmap_lock at this point and + * cannot block. If we cannot immediately take exclusive ownership + * of the VMA fallback to the move_ptes(). + */ + if (!trylock_vma_ref_count(vma)) + return false; + /* * We don't have to worry about the ordering of src and dst * ptlocks because exclusive mmap_lock prevents deadlock. @@ -270,6 +306,7 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr, spin_unlock(new_ptl); spin_unlock(old_ptl); + unlock_vma_ref_count(vma); return true; } #else @@ -281,11 +318,7 @@ static inline bool move_normal_pmd(struct vm_area_struct *vma, } #endif -/* - * Speculative page fault handlers will not detect page table changes done - * without ptl locking. - */ -#if defined(CONFIG_HAVE_MOVE_PUD) && !defined(CONFIG_SPECULATIVE_PAGE_FAULT) +#ifdef CONFIG_HAVE_MOVE_PUD static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr, unsigned long new_addr, pud_t *old_pud, pud_t *new_pud) { @@ -300,6 +333,14 @@ static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr, if (WARN_ON_ONCE(!pud_none(*new_pud))) return false; + /* + * We hold both exclusive mmap_lock and rmap_lock at this point and + * cannot block. If we cannot immediately take exclusive ownership + * of the VMA fallback to the move_ptes(). + */ + if (!trylock_vma_ref_count(vma)) + return false; + /* * We don't have to worry about the ordering of src and dst * ptlocks because exclusive mmap_lock prevents deadlock. @@ -322,6 +363,7 @@ static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr, spin_unlock(new_ptl); spin_unlock(old_ptl); + unlock_vma_ref_count(vma); return true; } #else