All of lore.kernel.org
 help / color / mirror / Atom feed
From: Michel Lespinasse <michel@lespinasse.org>
To: kernel test robot <lkp@intel.com>
Cc: Michel Lespinasse <michel@lespinasse.org>,
	Linux-MM <linux-mm@kvack.org>,
	linux-kernel@vger.kernel.org,
	Andrew Morton <akpm@linux-foundation.org>,
	llvm@lists.linux.dev, kbuild-all@lists.01.org,
	kernel-team@fb.com, Laurent Dufour <ldufour@linux.ibm.com>,
	Jerome Glisse <jglisse@google.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Michal Hocko <mhocko@suse.com>, Vlastimil Babka <vbabka@suse.cz>,
	Davidlohr Bueso <dave@stgolabs.net>
Subject: Re: [PATCH v2 18/35] mm: implement speculative handling in do_anonymous_page()
Date: Fri, 28 Jan 2022 14:08:46 -0800	[thread overview]
Message-ID: <20220128220846.GA18863@lespinasse.org> (raw)
In-Reply-To: <202201290445.uKuWeLmf-lkp@intel.com>

On Sat, Jan 29, 2022 at 05:03:53AM +0800, kernel test robot wrote:
> >> mm/memory.c:3876:20: warning: variable 'vmf' is uninitialized when used within its own initialization [-Wuninitialized]
>            if (!pte_map_lock(vmf)) {
>                 ~~~~~~~~~~~~~^~~~
>    include/linux/mm.h:3418:25: note: expanded from macro 'pte_map_lock'
>            struct vm_fault *vmf = __vmf;                                   \
>                             ~~~   ^~~~~
>    1 warning generated.

Ah, that's interesting - this works with gcc, but breaks with clang.

The following amended patch should fix this:
(I only added underscores to the pte_map_lock and pte_spinlock macros)

------------------------------------ 8< ---------------------------------

mm: add pte_map_lock() and pte_spinlock()

pte_map_lock() and pte_spinlock() are used by fault handlers to ensure
the pte is mapped and locked before they commit the faulted page to the
mm's address space at the end of the fault.

The functions differ in their preconditions; pte_map_lock() expects
the pte to be unmapped prior to the call, while pte_spinlock() expects
it to be already mapped.

In the speculative fault case, the functions verify, after locking the pte,
that the mmap sequence count has not changed since the start of the fault,
and thus that no mmap lock writers have been running concurrently with
the fault. After that point the page table lock serializes any further
races with concurrent mmap lock writers.

If the mmap sequence count check fails, both functions will return false
with the pte being left unmapped and unlocked.

Signed-off-by: Michel Lespinasse <michel@lespinasse.org>
---
 include/linux/mm.h | 38 ++++++++++++++++++++++++++
 mm/memory.c        | 66 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 104 insertions(+)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2e2122bd3da3..80894db6f01a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3394,5 +3394,43 @@ madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
 }
 #endif
 
+#ifdef CONFIG_MMU
+#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
+
+bool __pte_map_lock(struct vm_fault *vmf);
+
+static inline bool pte_map_lock(struct vm_fault *vmf)
+{
+	VM_BUG_ON(vmf->pte);
+	return __pte_map_lock(vmf);
+}
+
+static inline bool pte_spinlock(struct vm_fault *vmf)
+{
+	VM_BUG_ON(!vmf->pte);
+	return __pte_map_lock(vmf);
+}
+
+#else	/* !CONFIG_SPECULATIVE_PAGE_FAULT */
+
+#define pte_map_lock(____vmf)						\
+({									\
+	struct vm_fault *__vmf = ____vmf;				\
+	__vmf->pte = pte_offset_map_lock(__vmf->vma->vm_mm, __vmf->pmd,	\
+					 __vmf->address, &__vmf->ptl);	\
+	true;								\
+})
+
+#define pte_spinlock(____vmf)						\
+({									\
+	struct vm_fault *__vmf = ____vmf;				\
+	__vmf->ptl = pte_lockptr(__vmf->vma->vm_mm, __vmf->pmd);	\
+	spin_lock(__vmf->ptl);						\
+	true;								\
+})
+
+#endif	/* CONFIG_SPECULATIVE_PAGE_FAULT */
+#endif	/* CONFIG_MMU */
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
diff --git a/mm/memory.c b/mm/memory.c
index d0db10bd5bee..1ce837e47395 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2745,6 +2745,72 @@ EXPORT_SYMBOL_GPL(apply_to_existing_page_range);
 #define speculative_page_walk_end()   local_irq_enable()
 #endif
 
+bool __pte_map_lock(struct vm_fault *vmf)
+{
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	pmd_t pmdval;
+#endif
+	pte_t *pte = vmf->pte;
+	spinlock_t *ptl;
+
+	if (!(vmf->flags & FAULT_FLAG_SPECULATIVE)) {
+		vmf->ptl = pte_lockptr(vmf->vma->vm_mm, vmf->pmd);
+		if (!pte)
+			vmf->pte = pte_offset_map(vmf->pmd, vmf->address);
+		spin_lock(vmf->ptl);
+		return true;
+	}
+
+	speculative_page_walk_begin();
+	if (!mmap_seq_read_check(vmf->vma->vm_mm, vmf->seq))
+		goto fail;
+	/*
+	 * The mmap sequence count check guarantees that the page
+	 * tables are still valid at that point, and
+	 * speculative_page_walk_begin() ensures that they stay around.
+	 */
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	/*
+	 * We check if the pmd value is still the same to ensure that there
+	 * is not a huge collapse operation in progress in our back.
+	 */
+	pmdval = READ_ONCE(*vmf->pmd);
+	if (!pmd_same(pmdval, vmf->orig_pmd))
+		goto fail;
+#endif
+	ptl = pte_lockptr(vmf->vma->vm_mm, vmf->pmd);
+	if (!pte)
+		pte = pte_offset_map(vmf->pmd, vmf->address);
+	/*
+	 * Try locking the page table.
+	 *
+	 * Note that we might race against zap_pte_range() which
+	 * invalidates TLBs while holding the page table lock.
+	 * We are still under the speculative_page_walk_begin() section,
+	 * and zap_pte_range() could thus deadlock with us if we tried
+	 * using spin_lock() here.
+	 *
+	 * We also don't want to retry until spin_trylock() succeeds,
+	 * because of the starvation potential against a stream of lockers.
+	 */
+	if (unlikely(!spin_trylock(ptl)))
+		goto fail;
+	if (!mmap_seq_read_check(vmf->vma->vm_mm, vmf->seq))
+		goto unlock_fail;
+	speculative_page_walk_end();
+	vmf->pte = pte;
+	vmf->ptl = ptl;
+	return true;
+
+unlock_fail:
+	spin_unlock(ptl);
+fail:
+	if (pte)
+		pte_unmap(pte);
+	speculative_page_walk_end();
+	return false;
+}
+
 #endif	/* CONFIG_SPECULATIVE_PAGE_FAULT */
 
 /*
-- 
2.20.1


WARNING: multiple messages have this Message-ID (diff)
From: Michel Lespinasse <michel@lespinasse.org>
To: kbuild-all@lists.01.org
Subject: Re: [PATCH v2 18/35] mm: implement speculative handling in do_anonymous_page()
Date: Fri, 28 Jan 2022 14:08:46 -0800	[thread overview]
Message-ID: <20220128220846.GA18863@lespinasse.org> (raw)
In-Reply-To: <202201290445.uKuWeLmf-lkp@intel.com>

[-- Attachment #1: Type: text/plain, Size: 5324 bytes --]

On Sat, Jan 29, 2022 at 05:03:53AM +0800, kernel test robot wrote:
> >> mm/memory.c:3876:20: warning: variable 'vmf' is uninitialized when used within its own initialization [-Wuninitialized]
>            if (!pte_map_lock(vmf)) {
>                 ~~~~~~~~~~~~~^~~~
>    include/linux/mm.h:3418:25: note: expanded from macro 'pte_map_lock'
>            struct vm_fault *vmf = __vmf;                                   \
>                             ~~~   ^~~~~
>    1 warning generated.

Ah, that's interesting - this works with gcc, but breaks with clang.

The following amended patch should fix this:
(I only added underscores to the pte_map_lock and pte_spinlock macros)

------------------------------------ 8< ---------------------------------

mm: add pte_map_lock() and pte_spinlock()

pte_map_lock() and pte_spinlock() are used by fault handlers to ensure
the pte is mapped and locked before they commit the faulted page to the
mm's address space@the end of the fault.

The functions differ in their preconditions; pte_map_lock() expects
the pte to be unmapped prior to the call, while pte_spinlock() expects
it to be already mapped.

In the speculative fault case, the functions verify, after locking the pte,
that the mmap sequence count has not changed since the start of the fault,
and thus that no mmap lock writers have been running concurrently with
the fault. After that point the page table lock serializes any further
races with concurrent mmap lock writers.

If the mmap sequence count check fails, both functions will return false
with the pte being left unmapped and unlocked.

Signed-off-by: Michel Lespinasse <michel@lespinasse.org>
---
 include/linux/mm.h | 38 ++++++++++++++++++++++++++
 mm/memory.c        | 66 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 104 insertions(+)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2e2122bd3da3..80894db6f01a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3394,5 +3394,43 @@ madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
 }
 #endif
 
+#ifdef CONFIG_MMU
+#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
+
+bool __pte_map_lock(struct vm_fault *vmf);
+
+static inline bool pte_map_lock(struct vm_fault *vmf)
+{
+	VM_BUG_ON(vmf->pte);
+	return __pte_map_lock(vmf);
+}
+
+static inline bool pte_spinlock(struct vm_fault *vmf)
+{
+	VM_BUG_ON(!vmf->pte);
+	return __pte_map_lock(vmf);
+}
+
+#else	/* !CONFIG_SPECULATIVE_PAGE_FAULT */
+
+#define pte_map_lock(____vmf)						\
+({									\
+	struct vm_fault *__vmf = ____vmf;				\
+	__vmf->pte = pte_offset_map_lock(__vmf->vma->vm_mm, __vmf->pmd,	\
+					 __vmf->address, &__vmf->ptl);	\
+	true;								\
+})
+
+#define pte_spinlock(____vmf)						\
+({									\
+	struct vm_fault *__vmf = ____vmf;				\
+	__vmf->ptl = pte_lockptr(__vmf->vma->vm_mm, __vmf->pmd);	\
+	spin_lock(__vmf->ptl);						\
+	true;								\
+})
+
+#endif	/* CONFIG_SPECULATIVE_PAGE_FAULT */
+#endif	/* CONFIG_MMU */
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
diff --git a/mm/memory.c b/mm/memory.c
index d0db10bd5bee..1ce837e47395 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2745,6 +2745,72 @@ EXPORT_SYMBOL_GPL(apply_to_existing_page_range);
 #define speculative_page_walk_end()   local_irq_enable()
 #endif
 
+bool __pte_map_lock(struct vm_fault *vmf)
+{
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	pmd_t pmdval;
+#endif
+	pte_t *pte = vmf->pte;
+	spinlock_t *ptl;
+
+	if (!(vmf->flags & FAULT_FLAG_SPECULATIVE)) {
+		vmf->ptl = pte_lockptr(vmf->vma->vm_mm, vmf->pmd);
+		if (!pte)
+			vmf->pte = pte_offset_map(vmf->pmd, vmf->address);
+		spin_lock(vmf->ptl);
+		return true;
+	}
+
+	speculative_page_walk_begin();
+	if (!mmap_seq_read_check(vmf->vma->vm_mm, vmf->seq))
+		goto fail;
+	/*
+	 * The mmap sequence count check guarantees that the page
+	 * tables are still valid at that point, and
+	 * speculative_page_walk_begin() ensures that they stay around.
+	 */
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	/*
+	 * We check if the pmd value is still the same to ensure that there
+	 * is not a huge collapse operation in progress in our back.
+	 */
+	pmdval = READ_ONCE(*vmf->pmd);
+	if (!pmd_same(pmdval, vmf->orig_pmd))
+		goto fail;
+#endif
+	ptl = pte_lockptr(vmf->vma->vm_mm, vmf->pmd);
+	if (!pte)
+		pte = pte_offset_map(vmf->pmd, vmf->address);
+	/*
+	 * Try locking the page table.
+	 *
+	 * Note that we might race against zap_pte_range() which
+	 * invalidates TLBs while holding the page table lock.
+	 * We are still under the speculative_page_walk_begin() section,
+	 * and zap_pte_range() could thus deadlock with us if we tried
+	 * using spin_lock() here.
+	 *
+	 * We also don't want to retry until spin_trylock() succeeds,
+	 * because of the starvation potential against a stream of lockers.
+	 */
+	if (unlikely(!spin_trylock(ptl)))
+		goto fail;
+	if (!mmap_seq_read_check(vmf->vma->vm_mm, vmf->seq))
+		goto unlock_fail;
+	speculative_page_walk_end();
+	vmf->pte = pte;
+	vmf->ptl = ptl;
+	return true;
+
+unlock_fail:
+	spin_unlock(ptl);
+fail:
+	if (pte)
+		pte_unmap(pte);
+	speculative_page_walk_end();
+	return false;
+}
+
 #endif	/* CONFIG_SPECULATIVE_PAGE_FAULT */
 
 /*
-- 
2.20.1

  reply	other threads:[~2022-01-28 22:14 UTC|newest]

Thread overview: 77+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-01-28 13:09 [PATCH v2 00/35] Speculative page faults Michel Lespinasse
2022-01-28 13:09 ` [PATCH v2 01/35] mm: export dump_mm Michel Lespinasse
2022-01-28 13:09 ` [PATCH v2 02/35] mmap locking API: mmap_lock_is_contended returns a bool Michel Lespinasse
2022-01-28 13:09 ` [PATCH v2 03/35] mmap locking API: name the return values Michel Lespinasse
2022-01-31 16:17   ` Liam Howlett
2022-02-07 17:39     ` Michel Lespinasse
2022-01-28 13:09 ` [PATCH v2 04/35] do_anonymous_page: use update_mmu_tlb() Michel Lespinasse
2022-01-28 13:09 ` [PATCH v2 05/35] do_anonymous_page: reduce code duplication Michel Lespinasse
2022-01-28 13:09 ` [PATCH v2 06/35] mm: introduce CONFIG_SPECULATIVE_PAGE_FAULT Michel Lespinasse
2022-01-28 13:09 ` [PATCH v2 07/35] x86/mm: define ARCH_SUPPORTS_SPECULATIVE_PAGE_FAULT Michel Lespinasse
2022-01-28 13:09 ` [PATCH v2 08/35] mm: add FAULT_FLAG_SPECULATIVE flag Michel Lespinasse
2022-01-28 13:09 ` [PATCH v2 09/35] mm: add do_handle_mm_fault() Michel Lespinasse
2022-01-28 13:09 ` [PATCH v2 10/35] mm: add per-mm mmap sequence counter for speculative page fault handling Michel Lespinasse
2022-08-25 11:23   ` Pavan Kondeti
2022-01-28 13:09 ` [PATCH v2 11/35] mm: rcu safe vma freeing Michel Lespinasse
2022-01-28 13:09 ` [PATCH v2 12/35] mm: separate mmap locked assertion from find_vma Michel Lespinasse
2022-01-29  0:08   ` kernel test robot
2022-01-29  0:08     ` kernel test robot
2022-01-29  0:33     ` Michel Lespinasse
2022-01-29  0:33       ` Michel Lespinasse
2022-01-31 14:44   ` Matthew Wilcox
2022-02-04 22:41     ` Michel Lespinasse
2022-01-28 13:09 ` [PATCH v2 13/35] x86/mm: attempt speculative mm faults first Michel Lespinasse
2022-02-01 17:16   ` Liam Howlett
2022-02-07 17:39     ` Michel Lespinasse
2022-01-28 13:09 ` [PATCH v2 14/35] mm: add speculative_page_walk_begin() and speculative_page_walk_end() Michel Lespinasse
2022-01-28 13:09 ` [PATCH v2 15/35] mm: refactor __handle_mm_fault() / handle_pte_fault() Michel Lespinasse
2022-01-28 13:09 ` [PATCH v2 16/35] mm: implement speculative handling in __handle_mm_fault() Michel Lespinasse
2022-01-28 13:09 ` [PATCH v2 17/35] mm: add pte_map_lock() and pte_spinlock() Michel Lespinasse
2022-01-28 13:09 ` [PATCH v2 18/35] mm: implement speculative handling in do_anonymous_page() Michel Lespinasse
2022-01-28 21:03   ` kernel test robot
2022-01-28 21:03     ` kernel test robot
2022-01-28 22:08     ` Michel Lespinasse [this message]
2022-01-28 22:08       ` Michel Lespinasse
2022-01-30  2:54   ` [mm] fa5331bae2: canonical_address#:#[##] kernel test robot
2022-01-30  2:54     ` kernel test robot
2022-01-30  5:08     ` Michel Lespinasse
2022-01-30  5:08       ` Michel Lespinasse
2022-01-28 13:09 ` [PATCH v2 19/35] mm: enable speculative fault handling through do_anonymous_page() Michel Lespinasse
2022-01-28 13:09 ` [PATCH v2 20/35] mm: implement speculative handling in do_numa_page() Michel Lespinasse
2022-01-28 13:09 ` [PATCH v2 21/35] mm: enable speculative fault " Michel Lespinasse
2022-01-28 13:09 ` [PATCH v2 22/35] percpu-rwsem: enable percpu_sem destruction in atomic context Michel Lespinasse
2022-01-29 12:13   ` Hillf Danton
2022-01-31 18:04     ` Suren Baghdasaryan
2022-02-01  2:09       ` Hillf Danton
2022-02-07 19:31         ` Suren Baghdasaryan
2022-02-08  0:20           ` Hillf Danton
2022-02-08  1:31             ` Suren Baghdasaryan
2022-01-28 13:09 ` [PATCH v2 23/35] mm: add mmu_notifier_lock Michel Lespinasse
2022-07-27  7:34   ` Pavan Kondeti
2022-07-27 20:30     ` Suren Baghdasaryan
2022-01-28 13:09 ` [PATCH v2 24/35] mm: write lock mmu_notifier_lock when registering mmu notifiers Michel Lespinasse
2022-01-28 13:09 ` [PATCH v2 25/35] mm: add mmu_notifier_trylock() and mmu_notifier_unlock() Michel Lespinasse
2022-01-28 13:09 ` [PATCH v2 26/35] mm: implement speculative handling in wp_page_copy() Michel Lespinasse
2022-01-28 13:09 ` [PATCH v2 27/35] mm: implement and enable speculative fault handling in handle_pte_fault() Michel Lespinasse
2022-01-28 13:09 ` [PATCH v2 28/35] mm: disable speculative faults for single threaded user space Michel Lespinasse
2022-01-28 13:10 ` [PATCH v2 29/35] mm: disable rcu safe vma freeing " Michel Lespinasse
2022-01-28 13:10 ` [PATCH v2 30/35] mm: create new include/linux/vm_event.h header file Michel Lespinasse
2022-01-28 13:10 ` [PATCH v2 31/35] mm: anon spf statistics Michel Lespinasse
2022-01-28 13:10 ` [PATCH v2 32/35] arm64/mm: define ARCH_SUPPORTS_SPECULATIVE_PAGE_FAULT Michel Lespinasse
2022-01-28 13:10 ` [PATCH v2 33/35] arm64/mm: attempt speculative mm faults first Michel Lespinasse
2022-01-30  9:13   ` Mike Rapoport
2022-01-31  8:07     ` Michel Lespinasse
2022-02-01  8:58       ` Mike Rapoport
2022-02-07 17:39         ` Michel Lespinasse
2022-02-08  9:07           ` Mike Rapoport
2022-01-28 13:10 ` [PATCH v2 34/35] powerpc/mm: define ARCH_SUPPORTS_SPECULATIVE_PAGE_FAULT Michel Lespinasse
2022-01-28 13:10 ` [PATCH v2 35/35] powerpc/mm: attempt speculative mm faults first Michel Lespinasse
2022-01-31  9:56 ` [PATCH v2 00/35] Speculative page faults David Hildenbrand
2022-01-31 17:00   ` Suren Baghdasaryan
2022-02-01  1:14 ` Andrew Morton
2022-02-01  2:20   ` Matthew Wilcox
2022-02-07 17:39     ` Michel Lespinasse
2022-02-01 17:17   ` Sebastian Andrzej Siewior
2022-02-23 16:11 ` Mel Gorman
2022-03-08  5:37   ` Suren Baghdasaryan
2022-01-30 18:03 [PATCH v2 18/35] mm: implement speculative handling in do_anonymous_page() kernel test robot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220128220846.GA18863@lespinasse.org \
    --to=michel@lespinasse.org \
    --cc=akpm@linux-foundation.org \
    --cc=dave@stgolabs.net \
    --cc=jglisse@google.com \
    --cc=kbuild-all@lists.01.org \
    --cc=kernel-team@fb.com \
    --cc=ldufour@linux.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lkp@intel.com \
    --cc=llvm@lists.linux.dev \
    --cc=mhocko@suse.com \
    --cc=peterz@infradead.org \
    --cc=vbabka@suse.cz \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.