All of lore.kernel.org
 help / color / mirror / Atom feed
From: Michel Lespinasse <michel@lespinasse.org>
To: Linux-MM <linux-mm@kvack.org>,
	Linux-Kernel <linux-kernel@vger.kernel.org>
Cc: Laurent Dufour <ldufour@linux.ibm.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Michal Hocko <mhocko@suse.com>,
	Matthew Wilcox <willy@infradead.org>,
	Rik van Riel <riel@surriel.com>,
	Paul McKenney <paulmck@kernel.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	Suren Baghdasaryan <surenb@google.com>,
	Joel Fernandes <joelaf@google.com>,
	Andy Lutomirski <luto@kernel.org>,
	Michel Lespinasse <michel@lespinasse.org>
Subject: [PATCH 23/29] mm: implement speculative handling in do_swap_page()
Date: Fri, 30 Apr 2021 12:52:24 -0700	[thread overview]
Message-ID: <20210430195232.30491-24-michel@lespinasse.org> (raw)
In-Reply-To: <20210430195232.30491-1-michel@lespinasse.org>

If the pte is larger than long, use pte_spinlock() to lock the page table
when verifying the pte - pte_spinlock() is necessary to ensure the page
table is still valid when we are locking it.

Abort speculative faults if the pte is not a swap entry, or if the desired
page is not found in swap cache, to keep things as simple as possible.

Only use trylock when locking the swapped page - again to keep things
simple, and also the usual lock_page_or_retry would otherwise try to
release the mmap lock which is not held in the speculative case.

Use pte_map_lock() to ensure proper synchronization when finally committing
the faulted page to the mm address space.

Signed-off-by: Michel Lespinasse <michel@lespinasse.org>
---
 mm/memory.c | 74 ++++++++++++++++++++++++++++++-----------------------
 1 file changed, 42 insertions(+), 32 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index c3cd29d3acc6..a3708b4a616c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2654,30 +2654,6 @@ bool __pte_map_lock(struct vm_fault *vmf)
 
 #endif	/* CONFIG_SPECULATIVE_PAGE_FAULT */
 
-/*
- * handle_pte_fault chooses page fault handler according to an entry which was
- * read non-atomically.  Before making any commitment, on those architectures
- * or configurations (e.g. i386 with PAE) which might give a mix of unmatched
- * parts, do_swap_page must check under lock before unmapping the pte and
- * proceeding (but do_wp_page is only called after already making such a check;
- * and do_anonymous_page can safely check later on).
- */
-static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd,
-				pte_t *page_table, pte_t orig_pte)
-{
-	int same = 1;
-#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPTION)
-	if (sizeof(pte_t) > sizeof(unsigned long)) {
-		spinlock_t *ptl = pte_lockptr(mm, pmd);
-		spin_lock(ptl);
-		same = pte_same(*page_table, orig_pte);
-		spin_unlock(ptl);
-	}
-#endif
-	pte_unmap(page_table);
-	return same;
-}
-
 static inline bool cow_user_page(struct page *dst, struct page *src,
 				 struct vm_fault *vmf)
 {
@@ -3386,12 +3362,34 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
 		return VM_FAULT_RETRY;
 	}
 
-	if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte))
-		goto out;
+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPTION)
+	if (sizeof(pte_t) > sizeof(unsigned long)) {
+		/*
+		 * vmf->orig_pte was read non-atomically. Before making
+		 * any commitment, on those architectures or configurations
+		 * (e.g. i386 with PAE) which might give a mix of
+		 * unmatched parts, we must check under lock before
+		 * unmapping the pte and proceeding.
+		 *
+		 * (but do_wp_page is only called after already making
+		 * such a check; and do_anonymous_page can safely
+		 * check later on).
+		 */
+		if (!pte_spinlock(vmf))
+			return VM_FAULT_RETRY;
+		if (!pte_same(*vmf->pte, vmf->orig_pte))
+			goto unlock;
+		spin_unlock(vmf->ptl);
+	}
+#endif
+	pte_unmap(vmf->pte);
+	vmf->pte = NULL;
 
 	entry = pte_to_swp_entry(vmf->orig_pte);
 	if (unlikely(non_swap_entry(entry))) {
-		if (is_migration_entry(entry)) {
+		if (vmf->flags & FAULT_FLAG_SPECULATIVE) {
+			ret = VM_FAULT_RETRY;
+		} else if (is_migration_entry(entry)) {
 			migration_entry_wait(vma->vm_mm, vmf->pmd,
 					     vmf->address);
 		} else if (is_device_private_entry(entry)) {
@@ -3412,8 +3410,14 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
 	swapcache = page;
 
 	if (!page) {
-		struct swap_info_struct *si = swp_swap_info(entry);
+		struct swap_info_struct *si;
 
+		if (vmf->flags & FAULT_FLAG_SPECULATIVE) {
+			delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
+			return VM_FAULT_RETRY;
+		}
+
+		si = swp_swap_info(entry);
 		if (data_race(si->flags & SWP_SYNCHRONOUS_IO) &&
 		    __swap_count(entry) == 1) {
 			/* skip swapcache */
@@ -3476,7 +3480,10 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
 		goto out_release;
 	}
 
-	locked = lock_page_or_retry(page, vma->vm_mm, vmf->flags);
+	if (vmf->flags & FAULT_FLAG_SPECULATIVE)
+		locked = trylock_page(page);
+	else
+		locked = lock_page_or_retry(page, vma->vm_mm, vmf->flags);
 
 	delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
 	if (!locked) {
@@ -3504,10 +3511,13 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
 	cgroup_throttle_swaprate(page, GFP_KERNEL);
 
 	/*
-	 * Back out if somebody else already faulted in this pte.
+	 * Back out if the VMA has changed in our back during a speculative
+	 * page fault or if somebody else already faulted in this pte.
 	 */
-	vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
-			&vmf->ptl);
+	if (!pte_map_lock(vmf)) {
+		ret = VM_FAULT_RETRY;
+		goto out_page;
+	}
 	if (unlikely(!pte_same(*vmf->pte, vmf->orig_pte)))
 		goto out_nomap;
 
-- 
2.20.1


  parent reply	other threads:[~2021-04-30 19:54 UTC|newest]

Thread overview: 56+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-30 19:52 [PATCH 00/29] Speculative page faults (anon vmas only) Michel Lespinasse
2021-04-30 19:52 ` [PATCH 01/29] mm: export dump_mm Michel Lespinasse
2021-04-30 19:52 ` [PATCH 02/29] mmap locking API: mmap_lock_is_contended returns a bool Michel Lespinasse
2021-04-30 19:52 ` [PATCH 03/29] mmap locking API: name the return values Michel Lespinasse
2021-04-30 19:52 ` [PATCH 04/29] do_anonymous_page: use update_mmu_tlb() Michel Lespinasse
2021-06-10  0:38   ` Suren Baghdasaryan
2021-06-10  0:38     ` Suren Baghdasaryan
2021-04-30 19:52 ` [PATCH 05/29] do_anonymous_page: reduce code duplication Michel Lespinasse
2021-04-30 19:52 ` [PATCH 06/29] mm: introduce CONFIG_SPECULATIVE_PAGE_FAULT Michel Lespinasse
2021-04-30 19:52 ` [PATCH 07/29] x86/mm: define ARCH_SUPPORTS_SPECULATIVE_PAGE_FAULT Michel Lespinasse
2021-04-30 19:52 ` [PATCH 08/29] mm: add FAULT_FLAG_SPECULATIVE flag Michel Lespinasse
2021-06-10  0:58   ` Suren Baghdasaryan
2021-06-10  0:58     ` Suren Baghdasaryan
2021-04-30 19:52 ` [PATCH 09/29] mm: add do_handle_mm_fault() Michel Lespinasse
2021-04-30 19:52 ` [PATCH 10/29] mm: add per-mm mmap sequence counter for speculative page fault handling Michel Lespinasse
2021-04-30 19:52 ` [PATCH 11/29] mm: rcu safe vma freeing Michel Lespinasse
2021-04-30 19:52 ` [PATCH 12/29] x86/mm: attempt speculative mm faults first Michel Lespinasse
2021-04-30 19:52 ` [PATCH 13/29] mm: add speculative_page_walk_begin() and speculative_page_walk_end() Michel Lespinasse
2021-04-30 19:52 ` [PATCH 14/29] mm: refactor __handle_mm_fault() / handle_pte_fault() Michel Lespinasse
2021-04-30 19:52 ` [PATCH 15/29] mm: implement speculative handling in __handle_mm_fault() Michel Lespinasse
2021-04-30 19:52 ` [PATCH 16/29] mm: add pte_map_lock() and pte_spinlock() Michel Lespinasse
2021-04-30 23:33   ` kernel test robot
2021-04-30 23:33     ` kernel test robot
2021-04-30 23:45   ` kernel test robot
2021-04-30 23:45     ` kernel test robot
2021-04-30 19:52 ` [PATCH 17/29] mm: implement speculative handling in do_anonymous_page() Michel Lespinasse
2021-04-30 19:52 ` [PATCH 18/29] mm: enable speculative fault handling through do_anonymous_page() Michel Lespinasse
2021-04-30 19:52 ` [PATCH 19/29] mm: implement speculative handling in do_numa_page() Michel Lespinasse
2021-04-30 19:52 ` [PATCH 20/29] mm: enable speculative fault " Michel Lespinasse
2021-04-30 19:52 ` [PATCH 21/29] mm: implement speculative handling in wp_page_copy() Michel Lespinasse
2021-04-30 19:52 ` [PATCH 22/29] mm: implement and enable speculative fault handling in handle_pte_fault() Michel Lespinasse
2021-04-30 19:52 ` Michel Lespinasse [this message]
2021-04-30 19:52 ` [PATCH 24/29] mm: enable speculative fault handling through do_swap_page() Michel Lespinasse
2021-04-30 19:52 ` [PATCH 25/29] mm: disable speculative faults for single threaded user space Michel Lespinasse
2021-04-30 19:52 ` [PATCH 26/29] mm: disable rcu safe vma freeing " Michel Lespinasse
2021-04-30 19:52 ` [PATCH 27/29] mm: anon spf statistics Michel Lespinasse
2021-04-30 22:52   ` kernel test robot
2021-04-30 22:52     ` kernel test robot
2021-04-30 19:52 ` [PATCH 28/29] arm64/mm: define ARCH_SUPPORTS_SPECULATIVE_PAGE_FAULT Michel Lespinasse
2021-04-30 19:52 ` [PATCH 29/29] arm64/mm: attempt speculative mm faults first Michel Lespinasse
2021-04-30 19:52 ` [PATCH 30/31] powerpc/mm: define ARCH_SUPPORTS_SPECULATIVE_PAGE_FAULT Michel Lespinasse
2021-04-30 19:52 ` [PATCH 31/31] powerpc/mm: attempt speculative mm faults first Michel Lespinasse
2021-04-30 22:46 ` [PATCH 00/29] Speculative page faults (anon vmas only) Michel Lespinasse
2021-05-03 18:11   ` Michel Lespinasse
2021-05-17 17:57     ` Paul E. McKenney
2021-05-20 22:10       ` Suren Baghdasaryan
2021-05-20 22:10         ` Suren Baghdasaryan
2021-05-20 23:08         ` Paul E. McKenney
2021-06-01  7:41         ` Michel Lespinasse
2021-06-01 20:18           ` Paul E. McKenney
2021-06-01 20:23         ` Paul E. McKenney
2021-06-14  7:04         ` Michel Lespinasse
2021-05-01 19:56 ` Theodore Ts'o
2021-05-01 21:19   ` Michel Lespinasse
2021-06-17 13:46 ` David Hildenbrand
2021-07-09 10:41   ` David Hildenbrand

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210430195232.30491-24-michel@lespinasse.org \
    --to=michel@lespinasse.org \
    --cc=akpm@linux-foundation.org \
    --cc=joelaf@google.com \
    --cc=ldufour@linux.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=luto@kernel.org \
    --cc=mhocko@suse.com \
    --cc=paulmck@kernel.org \
    --cc=peterz@infradead.org \
    --cc=riel@surriel.com \
    --cc=surenb@google.com \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.