mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-11-24 05:40:55 +07:00
mm: convert i_mmap_mutex to rwsem
The i_mmap_mutex is a close cousin of the anon vma lock, both protecting similar data, one for file backed pages and the other for anon memory. To this end, this lock can also be a rwsem. In addition, there are some important opportunities to share the lock when there are no tree modifications. This conversion is straightforward. For now, all users take the write lock. [sfr@canb.auug.org.au: update fremap.c] Signed-off-by: Davidlohr Bueso <dbueso@suse.de> Reviewed-by: Rik van Riel <riel@redhat.com> Acked-by: "Kirill A. Shutemov" <kirill@shutemov.name> Acked-by: Hugh Dickins <hughd@google.com> Cc: Oleg Nesterov <oleg@redhat.com> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Acked-by: Mel Gorman <mgorman@suse.de> Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
83cde9e8ba
commit
c8c06efa8b
@ -472,12 +472,12 @@ static struct inode *hugetlbfs_get_root(struct super_block *sb,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Hugetlbfs is not reclaimable; therefore its i_mmap_mutex will never
|
* Hugetlbfs is not reclaimable; therefore its i_mmap_rwsem will never
|
||||||
* be taken from reclaim -- unlike regular filesystems. This needs an
|
* be taken from reclaim -- unlike regular filesystems. This needs an
|
||||||
* annotation because huge_pmd_share() does an allocation under
|
* annotation because huge_pmd_share() does an allocation under
|
||||||
* i_mmap_mutex.
|
* i_mmap_rwsem.
|
||||||
*/
|
*/
|
||||||
static struct lock_class_key hugetlbfs_i_mmap_mutex_key;
|
static struct lock_class_key hugetlbfs_i_mmap_rwsem_key;
|
||||||
|
|
||||||
static struct inode *hugetlbfs_get_inode(struct super_block *sb,
|
static struct inode *hugetlbfs_get_inode(struct super_block *sb,
|
||||||
struct inode *dir,
|
struct inode *dir,
|
||||||
@ -495,8 +495,8 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
|
|||||||
struct hugetlbfs_inode_info *info;
|
struct hugetlbfs_inode_info *info;
|
||||||
inode->i_ino = get_next_ino();
|
inode->i_ino = get_next_ino();
|
||||||
inode_init_owner(inode, dir, mode);
|
inode_init_owner(inode, dir, mode);
|
||||||
lockdep_set_class(&inode->i_mapping->i_mmap_mutex,
|
lockdep_set_class(&inode->i_mapping->i_mmap_rwsem,
|
||||||
&hugetlbfs_i_mmap_mutex_key);
|
&hugetlbfs_i_mmap_rwsem_key);
|
||||||
inode->i_mapping->a_ops = &hugetlbfs_aops;
|
inode->i_mapping->a_ops = &hugetlbfs_aops;
|
||||||
inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info;
|
inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info;
|
||||||
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
|
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
|
||||||
|
@ -346,7 +346,7 @@ void address_space_init_once(struct address_space *mapping)
|
|||||||
memset(mapping, 0, sizeof(*mapping));
|
memset(mapping, 0, sizeof(*mapping));
|
||||||
INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
|
INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
|
||||||
spin_lock_init(&mapping->tree_lock);
|
spin_lock_init(&mapping->tree_lock);
|
||||||
mutex_init(&mapping->i_mmap_mutex);
|
init_rwsem(&mapping->i_mmap_rwsem);
|
||||||
INIT_LIST_HEAD(&mapping->private_list);
|
INIT_LIST_HEAD(&mapping->private_list);
|
||||||
spin_lock_init(&mapping->private_lock);
|
spin_lock_init(&mapping->private_lock);
|
||||||
mapping->i_mmap = RB_ROOT;
|
mapping->i_mmap = RB_ROOT;
|
||||||
|
@ -18,6 +18,7 @@
|
|||||||
#include <linux/pid.h>
|
#include <linux/pid.h>
|
||||||
#include <linux/bug.h>
|
#include <linux/bug.h>
|
||||||
#include <linux/mutex.h>
|
#include <linux/mutex.h>
|
||||||
|
#include <linux/rwsem.h>
|
||||||
#include <linux/capability.h>
|
#include <linux/capability.h>
|
||||||
#include <linux/semaphore.h>
|
#include <linux/semaphore.h>
|
||||||
#include <linux/fiemap.h>
|
#include <linux/fiemap.h>
|
||||||
@ -401,7 +402,7 @@ struct address_space {
|
|||||||
atomic_t i_mmap_writable;/* count VM_SHARED mappings */
|
atomic_t i_mmap_writable;/* count VM_SHARED mappings */
|
||||||
struct rb_root i_mmap; /* tree of private and shared mappings */
|
struct rb_root i_mmap; /* tree of private and shared mappings */
|
||||||
struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
|
struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
|
||||||
struct mutex i_mmap_mutex; /* protect tree, count, list */
|
struct rw_semaphore i_mmap_rwsem; /* protect tree, count, list */
|
||||||
/* Protected by tree_lock together with the radix tree */
|
/* Protected by tree_lock together with the radix tree */
|
||||||
unsigned long nrpages; /* number of total pages */
|
unsigned long nrpages; /* number of total pages */
|
||||||
unsigned long nrshadows; /* number of shadow entries */
|
unsigned long nrshadows; /* number of shadow entries */
|
||||||
@ -469,12 +470,12 @@ int mapping_tagged(struct address_space *mapping, int tag);
|
|||||||
|
|
||||||
static inline void i_mmap_lock_write(struct address_space *mapping)
|
static inline void i_mmap_lock_write(struct address_space *mapping)
|
||||||
{
|
{
|
||||||
mutex_lock(&mapping->i_mmap_mutex);
|
down_write(&mapping->i_mmap_rwsem);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void i_mmap_unlock_write(struct address_space *mapping)
|
static inline void i_mmap_unlock_write(struct address_space *mapping)
|
||||||
{
|
{
|
||||||
mutex_unlock(&mapping->i_mmap_mutex);
|
up_write(&mapping->i_mmap_rwsem);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -154,7 +154,7 @@ struct mmu_notifier_ops {
|
|||||||
* Therefore notifier chains can only be traversed when either
|
* Therefore notifier chains can only be traversed when either
|
||||||
*
|
*
|
||||||
* 1. mmap_sem is held.
|
* 1. mmap_sem is held.
|
||||||
* 2. One of the reverse map locks is held (i_mmap_mutex or anon_vma->rwsem).
|
* 2. One of the reverse map locks is held (i_mmap_rwsem or anon_vma->rwsem).
|
||||||
* 3. No other concurrent thread can access the list (release)
|
* 3. No other concurrent thread can access the list (release)
|
||||||
*/
|
*/
|
||||||
struct mmu_notifier {
|
struct mmu_notifier {
|
||||||
|
@ -731,7 +731,7 @@ build_map_info(struct address_space *mapping, loff_t offset, bool is_register)
|
|||||||
|
|
||||||
if (!prev && !more) {
|
if (!prev && !more) {
|
||||||
/*
|
/*
|
||||||
* Needs GFP_NOWAIT to avoid i_mmap_mutex recursion through
|
* Needs GFP_NOWAIT to avoid i_mmap_rwsem recursion through
|
||||||
* reclaim. This is optimistic, no harm done if it fails.
|
* reclaim. This is optimistic, no harm done if it fails.
|
||||||
*/
|
*/
|
||||||
prev = kmalloc(sizeof(struct map_info),
|
prev = kmalloc(sizeof(struct map_info),
|
||||||
|
10
mm/filemap.c
10
mm/filemap.c
@ -62,16 +62,16 @@
|
|||||||
/*
|
/*
|
||||||
* Lock ordering:
|
* Lock ordering:
|
||||||
*
|
*
|
||||||
* ->i_mmap_mutex (truncate_pagecache)
|
* ->i_mmap_rwsem (truncate_pagecache)
|
||||||
* ->private_lock (__free_pte->__set_page_dirty_buffers)
|
* ->private_lock (__free_pte->__set_page_dirty_buffers)
|
||||||
* ->swap_lock (exclusive_swap_page, others)
|
* ->swap_lock (exclusive_swap_page, others)
|
||||||
* ->mapping->tree_lock
|
* ->mapping->tree_lock
|
||||||
*
|
*
|
||||||
* ->i_mutex
|
* ->i_mutex
|
||||||
* ->i_mmap_mutex (truncate->unmap_mapping_range)
|
* ->i_mmap_rwsem (truncate->unmap_mapping_range)
|
||||||
*
|
*
|
||||||
* ->mmap_sem
|
* ->mmap_sem
|
||||||
* ->i_mmap_mutex
|
* ->i_mmap_rwsem
|
||||||
* ->page_table_lock or pte_lock (various, mainly in memory.c)
|
* ->page_table_lock or pte_lock (various, mainly in memory.c)
|
||||||
* ->mapping->tree_lock (arch-dependent flush_dcache_mmap_lock)
|
* ->mapping->tree_lock (arch-dependent flush_dcache_mmap_lock)
|
||||||
*
|
*
|
||||||
@ -85,7 +85,7 @@
|
|||||||
* sb_lock (fs/fs-writeback.c)
|
* sb_lock (fs/fs-writeback.c)
|
||||||
* ->mapping->tree_lock (__sync_single_inode)
|
* ->mapping->tree_lock (__sync_single_inode)
|
||||||
*
|
*
|
||||||
* ->i_mmap_mutex
|
* ->i_mmap_rwsem
|
||||||
* ->anon_vma.lock (vma_adjust)
|
* ->anon_vma.lock (vma_adjust)
|
||||||
*
|
*
|
||||||
* ->anon_vma.lock
|
* ->anon_vma.lock
|
||||||
@ -105,7 +105,7 @@
|
|||||||
* ->inode->i_lock (zap_pte_range->set_page_dirty)
|
* ->inode->i_lock (zap_pte_range->set_page_dirty)
|
||||||
* ->private_lock (zap_pte_range->__set_page_dirty_buffers)
|
* ->private_lock (zap_pte_range->__set_page_dirty_buffers)
|
||||||
*
|
*
|
||||||
* ->i_mmap_mutex
|
* ->i_mmap_rwsem
|
||||||
* ->tasklist_lock (memory_failure, collect_procs_ao)
|
* ->tasklist_lock (memory_failure, collect_procs_ao)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
10
mm/hugetlb.c
10
mm/hugetlb.c
@ -2726,9 +2726,9 @@ void __unmap_hugepage_range_final(struct mmu_gather *tlb,
|
|||||||
* on its way out. We're lucky that the flag has such an appropriate
|
* on its way out. We're lucky that the flag has such an appropriate
|
||||||
* name, and can in fact be safely cleared here. We could clear it
|
* name, and can in fact be safely cleared here. We could clear it
|
||||||
* before the __unmap_hugepage_range above, but all that's necessary
|
* before the __unmap_hugepage_range above, but all that's necessary
|
||||||
* is to clear it before releasing the i_mmap_mutex. This works
|
* is to clear it before releasing the i_mmap_rwsem. This works
|
||||||
* because in the context this is called, the VMA is about to be
|
* because in the context this is called, the VMA is about to be
|
||||||
* destroyed and the i_mmap_mutex is held.
|
* destroyed and the i_mmap_rwsem is held.
|
||||||
*/
|
*/
|
||||||
vma->vm_flags &= ~VM_MAYSHARE;
|
vma->vm_flags &= ~VM_MAYSHARE;
|
||||||
}
|
}
|
||||||
@ -3370,9 +3370,9 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
|
|||||||
spin_unlock(ptl);
|
spin_unlock(ptl);
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
* Must flush TLB before releasing i_mmap_mutex: x86's huge_pmd_unshare
|
* Must flush TLB before releasing i_mmap_rwsem: x86's huge_pmd_unshare
|
||||||
* may have cleared our pud entry and done put_page on the page table:
|
* may have cleared our pud entry and done put_page on the page table:
|
||||||
* once we release i_mmap_mutex, another task can do the final put_page
|
* once we release i_mmap_rwsem, another task can do the final put_page
|
||||||
* and that page table be reused and filled with junk.
|
* and that page table be reused and filled with junk.
|
||||||
*/
|
*/
|
||||||
flush_tlb_range(vma, start, end);
|
flush_tlb_range(vma, start, end);
|
||||||
@ -3525,7 +3525,7 @@ static int vma_shareable(struct vm_area_struct *vma, unsigned long addr)
|
|||||||
* and returns the corresponding pte. While this is not necessary for the
|
* and returns the corresponding pte. While this is not necessary for the
|
||||||
* !shared pmd case because we can allocate the pmd later as well, it makes the
|
* !shared pmd case because we can allocate the pmd later as well, it makes the
|
||||||
* code much cleaner. pmd allocation is essential for the shared case because
|
* code much cleaner. pmd allocation is essential for the shared case because
|
||||||
* pud has to be populated inside the same i_mmap_mutex section - otherwise
|
* pud has to be populated inside the same i_mmap_rwsem section - otherwise
|
||||||
* racing tasks could either miss the sharing (see huge_pte_offset) or select a
|
* racing tasks could either miss the sharing (see huge_pte_offset) or select a
|
||||||
* bad pmd for sharing.
|
* bad pmd for sharing.
|
||||||
*/
|
*/
|
||||||
|
@ -232,7 +232,7 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Requires inode->i_mapping->i_mmap_mutex
|
* Requires inode->i_mapping->i_mmap_rwsem
|
||||||
*/
|
*/
|
||||||
static void __remove_shared_vm_struct(struct vm_area_struct *vma,
|
static void __remove_shared_vm_struct(struct vm_area_struct *vma,
|
||||||
struct file *file, struct address_space *mapping)
|
struct file *file, struct address_space *mapping)
|
||||||
@ -2791,7 +2791,7 @@ void exit_mmap(struct mm_struct *mm)
|
|||||||
|
|
||||||
/* Insert vm structure into process list sorted by address
|
/* Insert vm structure into process list sorted by address
|
||||||
* and into the inode's i_mmap tree. If vm_file is non-NULL
|
* and into the inode's i_mmap tree. If vm_file is non-NULL
|
||||||
* then i_mmap_mutex is taken here.
|
* then i_mmap_rwsem is taken here.
|
||||||
*/
|
*/
|
||||||
int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
|
int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
|
||||||
{
|
{
|
||||||
@ -3086,7 +3086,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
|
|||||||
*/
|
*/
|
||||||
if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
|
if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
|
||||||
BUG();
|
BUG();
|
||||||
mutex_lock_nest_lock(&mapping->i_mmap_mutex, &mm->mmap_sem);
|
down_write_nest_lock(&mapping->i_mmap_rwsem, &mm->mmap_sem);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3113,7 +3113,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
|
|||||||
* vma in this mm is backed by the same anon_vma or address_space.
|
* vma in this mm is backed by the same anon_vma or address_space.
|
||||||
*
|
*
|
||||||
* We can take all the locks in random order because the VM code
|
* We can take all the locks in random order because the VM code
|
||||||
* taking i_mmap_mutex or anon_vma->rwsem outside the mmap_sem never
|
* taking i_mmap_rwsem or anon_vma->rwsem outside the mmap_sem never
|
||||||
* takes more than one of them in a row. Secondly we're protected
|
* takes more than one of them in a row. Secondly we're protected
|
||||||
* against a concurrent mm_take_all_locks() by the mm_all_locks_mutex.
|
* against a concurrent mm_take_all_locks() by the mm_all_locks_mutex.
|
||||||
*
|
*
|
||||||
|
@ -99,7 +99,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
|
|||||||
spinlock_t *old_ptl, *new_ptl;
|
spinlock_t *old_ptl, *new_ptl;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* When need_rmap_locks is true, we take the i_mmap_mutex and anon_vma
|
* When need_rmap_locks is true, we take the i_mmap_rwsem and anon_vma
|
||||||
* locks to ensure that rmap will always observe either the old or the
|
* locks to ensure that rmap will always observe either the old or the
|
||||||
* new ptes. This is the easiest way to avoid races with
|
* new ptes. This is the easiest way to avoid races with
|
||||||
* truncate_pagecache(), page migration, etc...
|
* truncate_pagecache(), page migration, etc...
|
||||||
|
@ -23,7 +23,7 @@
|
|||||||
* inode->i_mutex (while writing or truncating, not reading or faulting)
|
* inode->i_mutex (while writing or truncating, not reading or faulting)
|
||||||
* mm->mmap_sem
|
* mm->mmap_sem
|
||||||
* page->flags PG_locked (lock_page)
|
* page->flags PG_locked (lock_page)
|
||||||
* mapping->i_mmap_mutex
|
* mapping->i_mmap_rwsem
|
||||||
* anon_vma->rwsem
|
* anon_vma->rwsem
|
||||||
* mm->page_table_lock or pte_lock
|
* mm->page_table_lock or pte_lock
|
||||||
* zone->lru_lock (in mark_page_accessed, isolate_lru_page)
|
* zone->lru_lock (in mark_page_accessed, isolate_lru_page)
|
||||||
@ -1260,7 +1260,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
|
|||||||
/*
|
/*
|
||||||
* We need mmap_sem locking, Otherwise VM_LOCKED check makes
|
* We need mmap_sem locking, Otherwise VM_LOCKED check makes
|
||||||
* unstable result and race. Plus, We can't wait here because
|
* unstable result and race. Plus, We can't wait here because
|
||||||
* we now hold anon_vma->rwsem or mapping->i_mmap_mutex.
|
* we now hold anon_vma->rwsem or mapping->i_mmap_rwsem.
|
||||||
* if trylock failed, the page remain in evictable lru and later
|
* if trylock failed, the page remain in evictable lru and later
|
||||||
* vmscan could retry to move the page to unevictable lru if the
|
* vmscan could retry to move the page to unevictable lru if the
|
||||||
* page is actually mlocked.
|
* page is actually mlocked.
|
||||||
@ -1684,7 +1684,7 @@ static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc)
|
|||||||
* The page lock not only makes sure that page->mapping cannot
|
* The page lock not only makes sure that page->mapping cannot
|
||||||
* suddenly be NULLified by truncation, it makes sure that the
|
* suddenly be NULLified by truncation, it makes sure that the
|
||||||
* structure at mapping cannot be freed and reused yet,
|
* structure at mapping cannot be freed and reused yet,
|
||||||
* so we can safely take mapping->i_mmap_mutex.
|
* so we can safely take mapping->i_mmap_rwsem.
|
||||||
*/
|
*/
|
||||||
VM_BUG_ON_PAGE(!PageLocked(page), page);
|
VM_BUG_ON_PAGE(!PageLocked(page), page);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user