linux-arm-kernel.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
From: Laurent Dufour <ldufour@linux.ibm.com>
To: Suren Baghdasaryan <surenb@google.com>, akpm@linux-foundation.org
Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com,
	vbabka@suse.cz, hannes@cmpxchg.org, mgorman@suse.de,
	dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com,
	peterz@infradead.org, laurent.dufour@fr.ibm.com,
	paulmck@kernel.org, luto@kernel.org, songliubraving@fb.com,
	peterx@redhat.com, david@redhat.com, dhowells@redhat.com,
	hughd@google.com, bigeasy@linutronix.de,
	kent.overstreet@linux.dev, rientjes@google.com,
	axelrasmussen@google.com, joelaf@google.com, minchan@google.com,
	kernel-team@android.com, linux-mm@kvack.org,
	linux-arm-kernel@lists.infradead.org,
	linuxppc-dev@lists.ozlabs.org, x86@kernel.org,
	linux-kernel@vger.kernel.org
Subject: Re: [RFC PATCH RESEND 05/28] mm: add per-VMA lock and helper functions to control it
Date: Tue, 6 Sep 2022 15:46:56 +0200	[thread overview]
Message-ID: <c84136d3-703a-0e57-20ce-59f6b5823999@linux.ibm.com> (raw)
In-Reply-To: <20220901173516.702122-6-surenb@google.com>

Le 01/09/2022 à 19:34, Suren Baghdasaryan a écrit :
> Introduce a per-VMA rw_semaphore to be used during page fault handling
> instead of mmap_lock. Because there are cases when multiple VMAs need
> to be exclusively locked during VMA tree modifications, instead of the
> usual lock/unlock patter we mark a VMA as locked by taking per-VMA lock
> exclusively and setting vma->lock_seq to the current mm->lock_seq. When
> mmap_write_lock holder is done with all modifications and drops mmap_lock,
> it will increment mm->lock_seq, effectively unlocking all VMAs marked as
> locked.
> 
> Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Despite a minor comment below,

Reviewed-by: Laurent Dufour <laurent.dufour@fr.ibm.com>

> ---
>  include/linux/mm.h        | 78 +++++++++++++++++++++++++++++++++++++++
>  include/linux/mm_types.h  |  7 ++++
>  include/linux/mmap_lock.h | 13 +++++++
>  kernel/fork.c             |  4 ++
>  mm/init-mm.c              |  3 ++
>  5 files changed, 105 insertions(+)
> 
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 7d322a979455..476bf936c5f0 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -611,6 +611,83 @@ struct vm_operations_struct {
>  					  unsigned long addr);
>  };
>  
> +#ifdef CONFIG_PER_VMA_LOCK
> +static inline void vma_init_lock(struct vm_area_struct *vma)
> +{
> +	init_rwsem(&vma->lock);
> +	vma->vm_lock_seq = -1;
> +}
> +
> +static inline void vma_mark_locked(struct vm_area_struct *vma)
> +{
> +	int mm_lock_seq;
> +
> +	mmap_assert_write_locked(vma->vm_mm);
> +
> +	/*
> +	 * current task is holding mmap_write_lock, both vma->vm_lock_seq and
> +	 * mm->mm_lock_seq can't be concurrently modified.
> +	 */
> +	mm_lock_seq = READ_ONCE(vma->vm_mm->mm_lock_seq);
> +	if (vma->vm_lock_seq == mm_lock_seq)
> +		return;
> +
> +	down_write(&vma->lock);
> +	vma->vm_lock_seq = mm_lock_seq;
> +	up_write(&vma->lock);
> +}
> +
> +static inline bool vma_read_trylock(struct vm_area_struct *vma)
> +{
> +	if (unlikely(down_read_trylock(&vma->lock) == 0))
> +		return false;
> +
> +	/*
> +	 * Overflow might produce false locked result but it's not critical.

It might be good to precise here that in the case of false locked, the
caller is assumed to fallback read locking the mm entirely before doing its
change relative to that VMA.

> +	 * False unlocked result is critical but is impossible because we
> +	 * modify and check vma->vm_lock_seq under vma->lock protection and
> +	 * mm->mm_lock_seq modification invalidates all existing locks.
> +	 */
> +	if (vma->vm_lock_seq == READ_ONCE(vma->vm_mm->mm_lock_seq)) {
> +		up_read(&vma->lock);
> +		return false;
> +	}
> +	return true;
> +}
> +
> +static inline void vma_read_unlock(struct vm_area_struct *vma)
> +{
> +	up_read(&vma->lock);
> +}
> +
> +static inline void vma_assert_locked(struct vm_area_struct *vma)
> +{
> +	lockdep_assert_held(&vma->lock);
> +	VM_BUG_ON_VMA(!rwsem_is_locked(&vma->lock), vma);
> +}
> +
> +static inline void vma_assert_write_locked(struct vm_area_struct *vma, int pos)
> +{
> +	mmap_assert_write_locked(vma->vm_mm);
> +	/*
> +	 * current task is holding mmap_write_lock, both vma->vm_lock_seq and
> +	 * mm->mm_lock_seq can't be concurrently modified.
> +	 */
> +	VM_BUG_ON_VMA(vma->vm_lock_seq != READ_ONCE(vma->vm_mm->mm_lock_seq), vma);
> +}
> +
> +#else /* CONFIG_PER_VMA_LOCK */
> +
> +static inline void vma_init_lock(struct vm_area_struct *vma) {}
> +static inline void vma_mark_locked(struct vm_area_struct *vma) {}
> +static inline bool vma_read_trylock(struct vm_area_struct *vma)
> +		{ return false; }
> +static inline void vma_read_unlock(struct vm_area_struct *vma) {}
> +static inline void vma_assert_locked(struct vm_area_struct *vma) {}
> +static inline void vma_assert_write_locked(struct vm_area_struct *vma, int pos) {}
> +
> +#endif /* CONFIG_PER_VMA_LOCK */
> +
>  static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
>  {
>  	static const struct vm_operations_struct dummy_vm_ops = {};
> @@ -619,6 +696,7 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
>  	vma->vm_mm = mm;
>  	vma->vm_ops = &dummy_vm_ops;
>  	INIT_LIST_HEAD(&vma->anon_vma_chain);
> +	vma_init_lock(vma);
>  }
>  
>  static inline void vma_set_anonymous(struct vm_area_struct *vma)
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index bed25ef7c994..6a03f59c1e78 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -486,6 +486,10 @@ struct vm_area_struct {
>  	struct mempolicy *vm_policy;	/* NUMA policy for the VMA */
>  #endif
>  	struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
> +#ifdef CONFIG_PER_VMA_LOCK
> +	struct rw_semaphore lock;
> +	int vm_lock_seq;
> +#endif
>  } __randomize_layout;
>  
>  struct kioctx_table;
> @@ -567,6 +571,9 @@ struct mm_struct {
>  					  * init_mm.mmlist, and are protected
>  					  * by mmlist_lock
>  					  */
> +#ifdef CONFIG_PER_VMA_LOCK
> +		int mm_lock_seq;
> +#endif
>  
>  
>  		unsigned long hiwater_rss; /* High-watermark of RSS usage */
> diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
> index e49ba91bb1f0..a391ae226564 100644
> --- a/include/linux/mmap_lock.h
> +++ b/include/linux/mmap_lock.h
> @@ -72,6 +72,17 @@ static inline void mmap_assert_write_locked(struct mm_struct *mm)
>  	VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_lock), mm);
>  }
>  
> +#ifdef CONFIG_PER_VMA_LOCK
> +static inline void vma_mark_unlocked_all(struct mm_struct *mm)
> +{
> +	mmap_assert_write_locked(mm);
> +	/* No races during update due to exclusive mmap_lock being held */
> +	WRITE_ONCE(mm->mm_lock_seq, mm->mm_lock_seq + 1);
> +}
> +#else
> +static inline void vma_mark_unlocked_all(struct mm_struct *mm) {}
> +#endif
> +
>  static inline void mmap_init_lock(struct mm_struct *mm)
>  {
>  	init_rwsem(&mm->mmap_lock);
> @@ -114,12 +125,14 @@ static inline bool mmap_write_trylock(struct mm_struct *mm)
>  static inline void mmap_write_unlock(struct mm_struct *mm)
>  {
>  	__mmap_lock_trace_released(mm, true);
> +	vma_mark_unlocked_all(mm);
>  	up_write(&mm->mmap_lock);
>  }
>  
>  static inline void mmap_write_downgrade(struct mm_struct *mm)
>  {
>  	__mmap_lock_trace_acquire_returned(mm, false, true);
> +	vma_mark_unlocked_all(mm);
>  	downgrade_write(&mm->mmap_lock);
>  }
>  
> diff --git a/kernel/fork.c b/kernel/fork.c
> index 614872438393..bfab31ecd11e 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -475,6 +475,7 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
>  		 */
>  		*new = data_race(*orig);
>  		INIT_LIST_HEAD(&new->anon_vma_chain);
> +		vma_init_lock(new);
>  		new->vm_next = new->vm_prev = NULL;
>  		dup_anon_vma_name(orig, new);
>  	}
> @@ -1130,6 +1131,9 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
>  	seqcount_init(&mm->write_protect_seq);
>  	mmap_init_lock(mm);
>  	INIT_LIST_HEAD(&mm->mmlist);
> +#ifdef CONFIG_PER_VMA_LOCK
> +	WRITE_ONCE(mm->mm_lock_seq, 0);
> +#endif
>  	mm_pgtables_bytes_init(mm);
>  	mm->map_count = 0;
>  	mm->locked_vm = 0;
> diff --git a/mm/init-mm.c b/mm/init-mm.c
> index fbe7844d0912..8399f90d631c 100644
> --- a/mm/init-mm.c
> +++ b/mm/init-mm.c
> @@ -37,6 +37,9 @@ struct mm_struct init_mm = {
>  	.page_table_lock =  __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
>  	.arg_lock	=  __SPIN_LOCK_UNLOCKED(init_mm.arg_lock),
>  	.mmlist		= LIST_HEAD_INIT(init_mm.mmlist),
> +#ifdef CONFIG_PER_VMA_LOCK
> +	.mm_lock_seq	= 0,
> +#endif
>  	.user_ns	= &init_user_ns,
>  	.cpu_bitmap	= CPU_BITS_NONE,
>  #ifdef CONFIG_IOMMU_SVA


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

  reply	other threads:[~2022-09-06 13:56 UTC|newest]

Thread overview: 91+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-09-01 17:34 [RFC PATCH RESEND 00/28] per-VMA locks proposal Suren Baghdasaryan
2022-09-01 17:34 ` [RFC PATCH RESEND 01/28] mm: introduce CONFIG_PER_VMA_LOCK Suren Baghdasaryan
2022-09-01 17:34 ` [RFC PATCH RESEND 02/28] mm: rcu safe VMA freeing Suren Baghdasaryan
2022-09-01 17:34 ` [RFC PATCH RESEND 03/28] mm: introduce __find_vma to be used without mmap_lock protection Suren Baghdasaryan
2022-09-01 20:22   ` Kent Overstreet
2022-09-01 23:18     ` Suren Baghdasaryan
2022-09-01 17:34 ` [RFC PATCH RESEND 04/28] mm: move mmap_lock assert function definitions Suren Baghdasaryan
2022-09-01 20:24   ` Kent Overstreet
2022-09-01 20:51     ` Liam Howlett
2022-09-01 23:21       ` Suren Baghdasaryan
2022-09-02  6:23     ` Sebastian Andrzej Siewior
2022-09-02 17:46       ` Suren Baghdasaryan
2022-09-01 17:34 ` [RFC PATCH RESEND 05/28] mm: add per-VMA lock and helper functions to control it Suren Baghdasaryan
2022-09-06 13:46   ` Laurent Dufour [this message]
2022-09-06 17:24     ` Suren Baghdasaryan
2022-09-01 17:34 ` [RFC PATCH RESEND 06/28] mm: mark VMA as locked whenever vma->vm_flags are modified Suren Baghdasaryan
2022-09-06 14:26   ` Laurent Dufour
2022-09-06 19:00     ` Suren Baghdasaryan
2022-09-06 20:00       ` Liam Howlett
2022-09-06 20:13         ` Suren Baghdasaryan
2022-09-01 17:34 ` [RFC PATCH RESEND 07/28] kernel/fork: mark VMAs as locked before copying pages during fork Suren Baghdasaryan
2022-09-06 14:37   ` Laurent Dufour
2022-09-08 23:57     ` Suren Baghdasaryan
2022-09-09 13:27       ` Laurent Dufour
2022-09-09 16:29         ` Suren Baghdasaryan
2022-09-01 17:34 ` [RFC PATCH RESEND 08/28] mm/khugepaged: mark VMA as locked while collapsing a hugepage Suren Baghdasaryan
2022-09-06 14:43   ` Laurent Dufour
2022-09-09  0:15     ` Suren Baghdasaryan
2022-09-01 17:34 ` [RFC PATCH RESEND 09/28] mm/mempolicy: mark VMA as locked when changing protection policy Suren Baghdasaryan
2022-09-06 14:47   ` Laurent Dufour
2022-09-09  0:27     ` Suren Baghdasaryan
2022-09-01 17:34 ` [RFC PATCH RESEND 10/28] mm/mmap: mark VMAs as locked in vma_adjust Suren Baghdasaryan
2022-09-06 15:35   ` Laurent Dufour
2022-09-09  0:51     ` Suren Baghdasaryan
2022-09-09 15:52       ` Laurent Dufour
2022-09-01 17:34 ` [RFC PATCH RESEND 11/28] mm/mmap: mark VMAs as locked before merging or splitting them Suren Baghdasaryan
2022-09-06 15:44   ` Laurent Dufour
2022-09-01 17:35 ` [RFC PATCH RESEND 12/28] mm/mremap: mark VMA as locked while remapping it to a new address range Suren Baghdasaryan
2022-09-06 16:09   ` Laurent Dufour
2022-09-01 17:35 ` [RFC PATCH RESEND 13/28] mm: conditionally mark VMA as locked in free_pgtables and unmap_page_range Suren Baghdasaryan
2022-09-09 10:33   ` Laurent Dufour
2022-09-09 16:43     ` Suren Baghdasaryan
2022-09-01 17:35 ` [RFC PATCH RESEND 14/28] mm: mark VMAs as locked before isolating them Suren Baghdasaryan
2022-09-09 13:35   ` Laurent Dufour
2022-09-09 16:28     ` Suren Baghdasaryan
2022-09-01 17:35 ` [RFC PATCH RESEND 15/28] mm/mmap: mark adjacent VMAs as locked if they can grow into unmapped area Suren Baghdasaryan
2022-09-09 13:43   ` Laurent Dufour
2022-09-09 16:25     ` Suren Baghdasaryan
2022-09-01 17:35 ` [RFC PATCH RESEND 16/28] kernel/fork: assert no VMA readers during its destruction Suren Baghdasaryan
2022-09-09 13:56   ` Laurent Dufour
2022-09-09 16:19     ` Suren Baghdasaryan
2022-09-01 17:35 ` [RFC PATCH RESEND 17/28] mm/mmap: prevent pagefault handler from racing with mmu_notifier registration Suren Baghdasaryan
2022-09-09 14:20   ` Laurent Dufour
2022-09-09 16:12     ` Suren Baghdasaryan
2022-09-01 17:35 ` [RFC PATCH RESEND 18/28] mm: add FAULT_FLAG_VMA_LOCK flag Suren Baghdasaryan
2022-09-09 14:26   ` Laurent Dufour
2022-09-01 17:35 ` [RFC PATCH RESEND 19/28] mm: disallow do_swap_page to handle page faults under VMA lock Suren Baghdasaryan
2022-09-06 19:39   ` Peter Xu
2022-09-06 20:08     ` Suren Baghdasaryan
2022-09-06 20:22       ` Peter Xu
2022-09-07  0:58         ` Suren Baghdasaryan
2022-09-09 14:26   ` Laurent Dufour
2022-09-01 17:35 ` [RFC PATCH RESEND 20/28] mm: introduce per-VMA lock statistics Suren Baghdasaryan
2022-09-09 14:28   ` Laurent Dufour
2022-09-09 16:11     ` Suren Baghdasaryan
2022-09-01 17:35 ` [RFC PATCH RESEND 21/28] mm: introduce find_and_lock_anon_vma to be used from arch-specific code Suren Baghdasaryan
2022-09-09 14:38   ` Laurent Dufour
2022-09-09 16:10     ` Suren Baghdasaryan
2022-09-01 17:35 ` [RFC PATCH RESEND 22/28] x86/mm: try VMA lock-based page fault handling first Suren Baghdasaryan
2022-09-01 17:35 ` [RFC PATCH RESEND 23/28] x86/mm: define ARCH_SUPPORTS_PER_VMA_LOCK Suren Baghdasaryan
2022-09-01 20:20   ` Kent Overstreet
2022-09-01 23:17     ` Suren Baghdasaryan
2022-09-01 17:35 ` [RFC PATCH RESEND 24/28] arm64/mm: try VMA lock-based page fault handling first Suren Baghdasaryan
2022-09-01 17:35 ` [RFC PATCH RESEND 25/28] arm64/mm: define ARCH_SUPPORTS_PER_VMA_LOCK Suren Baghdasaryan
2022-09-01 17:35 ` [RFC PATCH RESEND 26/28] powerc/mm: try VMA lock-based page fault handling first Suren Baghdasaryan
2022-09-01 17:35 ` [RFC PATCH RESEND 27/28] powerpc/mm: define ARCH_SUPPORTS_PER_VMA_LOCK Suren Baghdasaryan
2022-09-01 17:35 ` [RFC PATCH RESEND 28/28] kernel/fork: throttle call_rcu() calls in vm_area_free Suren Baghdasaryan
2022-09-09 15:19   ` Laurent Dufour
2022-09-09 16:02     ` Suren Baghdasaryan
2022-09-09 16:14       ` Laurent Dufour
2022-09-01 20:58 ` [RFC PATCH RESEND 00/28] per-VMA locks proposal Kent Overstreet
2022-09-01 23:26   ` Suren Baghdasaryan
2022-09-11  9:35     ` Vlastimil Babka
2022-09-28  2:28       ` Suren Baghdasaryan
2022-09-29 11:18         ` Vlastimil Babka
2022-09-02  7:42 ` Peter Zijlstra
2022-09-02 14:45   ` Suren Baghdasaryan
2022-09-05 12:32 ` Michal Hocko
2022-09-05 18:32   ` Suren Baghdasaryan
2022-09-05 20:35     ` Kent Overstreet
2022-09-06 15:46       ` Suren Baghdasaryan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=c84136d3-703a-0e57-20ce-59f6b5823999@linux.ibm.com \
    --to=ldufour@linux.ibm.com \
    --cc=akpm@linux-foundation.org \
    --cc=axelrasmussen@google.com \
    --cc=bigeasy@linutronix.de \
    --cc=dave@stgolabs.net \
    --cc=david@redhat.com \
    --cc=dhowells@redhat.com \
    --cc=hannes@cmpxchg.org \
    --cc=hughd@google.com \
    --cc=jglisse@google.com \
    --cc=joelaf@google.com \
    --cc=kent.overstreet@linux.dev \
    --cc=kernel-team@android.com \
    --cc=laurent.dufour@fr.ibm.com \
    --cc=liam.howlett@oracle.com \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=luto@kernel.org \
    --cc=mgorman@suse.de \
    --cc=mhocko@suse.com \
    --cc=michel@lespinasse.org \
    --cc=minchan@google.com \
    --cc=paulmck@kernel.org \
    --cc=peterx@redhat.com \
    --cc=peterz@infradead.org \
    --cc=rientjes@google.com \
    --cc=songliubraving@fb.com \
    --cc=surenb@google.com \
    --cc=vbabka@suse.cz \
    --cc=willy@infradead.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).