All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Xu <peterx@redhat.com>
To: linux-kernel@vger.kernel.org, linux-mm@kvack.org
Cc: Axel Rasmussen <axelrasmussen@google.com>,
	Nadav Amit <nadav.amit@gmail.com>,
	Jerome Glisse <jglisse@redhat.com>,
	"Kirill A . Shutemov" <kirill@shutemov.name>,
	Jason Gunthorpe <jgg@ziepe.ca>,
	Alistair Popple <apopple@nvidia.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	David Hildenbrand <david@redhat.com>,
	peterx@redhat.com, Andrea Arcangeli <aarcange@redhat.com>,
	Matthew Wilcox <willy@infradead.org>,
	Mike Kravetz <mike.kravetz@oracle.com>,
	Tiberiu Georgescu <tiberiu.georgescu@nutanix.com>,
	Hugh Dickins <hughd@google.com>,
	Miaohe Lin <linmiaohe@huawei.com>,
	Mike Rapoport <rppt@linux.vnet.ibm.com>
Subject: [PATCH v5 21/26] hugetlb/userfaultfd: Handle uffd-wp special pte in hugetlb pf handler
Date: Thu, 15 Jul 2021 16:16:39 -0400	[thread overview]
Message-ID: <20210715201639.211982-1-peterx@redhat.com> (raw)
In-Reply-To: <20210715201422.211004-1-peterx@redhat.com>

Teach the hugetlb page fault code to understand uffd-wp special pte.  For
example, when seeing such a pte we need to convert any write fault into a read
one (which is fake - we'll retry the write later if so).  Meanwhile, for
handle_userfault() we'll need to make sure we must wait for the special swap
pte too just like a none pte.

Note that we also need to teach UFFDIO_COPY about this special pte across the
code path so that we can safely install a new page at this special pte as long
as we know it's a stall entry.

Signed-off-by: Peter Xu <peterx@redhat.com>
---
 fs/userfaultfd.c |  5 ++++-
 mm/hugetlb.c     | 26 ++++++++++++++++++++------
 mm/userfaultfd.c |  5 ++++-
 3 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index e1c1cbc7bcc8..644df737fbb2 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -245,8 +245,11 @@ static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
 	/*
 	 * Lockless access: we're in a wait_event so it's ok if it
 	 * changes under us.
+	 *
+	 * Regarding uffd-wp special case, please refer to comments in
+	 * userfaultfd_must_wait().
 	 */
-	if (huge_pte_none(pte))
+	if (huge_pte_none(pte) || pte_swp_uffd_wp_special(pte))
 		ret = true;
 	if (!huge_pte_write(pte) && (reason & VM_UFFD_WP))
 		ret = true;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 517ee30f272c..5941b5cd7ecc 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4786,7 +4786,8 @@ static inline vm_fault_t hugetlb_handle_userfault(struct vm_area_struct *vma,
 static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
 			struct vm_area_struct *vma,
 			struct address_space *mapping, pgoff_t idx,
-			unsigned long address, pte_t *ptep, unsigned int flags)
+			unsigned long address, pte_t *ptep,
+			pte_t old_pte, unsigned int flags)
 {
 	struct hstate *h = hstate_vma(vma);
 	vm_fault_t ret = VM_FAULT_SIGBUS;
@@ -4910,7 +4911,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
 
 	ptl = huge_pte_lock(h, mm, ptep);
 	ret = 0;
-	if (!huge_pte_none(huge_ptep_get(ptep)))
+	if (!pte_same(huge_ptep_get(ptep), old_pte))
 		goto backout;
 
 	if (anon_rmap) {
@@ -4920,6 +4921,12 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
 		page_dup_rmap(page, true);
 	new_pte = make_huge_pte(vma, page, ((vma->vm_flags & VM_WRITE)
 				&& (vma->vm_flags & VM_SHARED)));
+	/*
+	 * If this pte was previously wr-protected, keep it wr-protected even
+	 * if populated.
+	 */
+	if (unlikely(pte_swp_uffd_wp_special(old_pte)))
+		new_pte = huge_pte_wrprotect(huge_pte_mkuffd_wp(new_pte));
 	set_huge_pte_at(mm, haddr, ptep, new_pte);
 
 	hugetlb_count_add(pages_per_huge_page(h), mm);
@@ -5035,8 +5042,13 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	mutex_lock(&hugetlb_fault_mutex_table[hash]);
 
 	entry = huge_ptep_get(ptep);
-	if (huge_pte_none(entry)) {
-		ret = hugetlb_no_page(mm, vma, mapping, idx, address, ptep, flags);
+	/*
+	 * uffd-wp-special should be handled merely the same as pte none
+	 * because it's basically a none pte with a special marker
+	 */
+	if (huge_pte_none(entry) || pte_swp_uffd_wp_special(entry)) {
+		ret = hugetlb_no_page(mm, vma, mapping, idx, address, ptep,
+				      entry, flags);
 		goto out_mutex;
 	}
 
@@ -5170,7 +5182,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
 	pgoff_t idx = vma_hugecache_offset(h, dst_vma, dst_addr);
 	unsigned long size;
 	int vm_shared = dst_vma->vm_flags & VM_SHARED;
-	pte_t _dst_pte;
+	pte_t _dst_pte, cur_pte;
 	spinlock_t *ptl;
 	int ret = -ENOMEM;
 	struct page *page;
@@ -5287,8 +5299,10 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
 	if (idx >= size)
 		goto out_release_unlock;
 
+	cur_pte = huge_ptep_get(dst_pte);
 	ret = -EEXIST;
-	if (!huge_pte_none(huge_ptep_get(dst_pte)))
+	/* Please refer to shmem_mfill_atomic_pte() for uffd-wp special case */
+	if (!huge_pte_none(cur_pte) && !pte_swp_uffd_wp_special(cur_pte))
 		goto out_release_unlock;
 
 	if (vm_shared) {
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 7ba721aca1c5..a8038903effd 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -363,6 +363,8 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
 	}
 
 	while (src_addr < src_start + len) {
+		pte_t pteval;
+
 		BUG_ON(dst_addr >= dst_start + len);
 
 		/*
@@ -385,8 +387,9 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
 			goto out_unlock;
 		}
 
+		pteval = huge_ptep_get(dst_pte);
 		if (mode != MCOPY_ATOMIC_CONTINUE &&
-		    !huge_pte_none(huge_ptep_get(dst_pte))) {
+		    !huge_pte_none(pteval) && !pte_swp_uffd_wp_special(pteval)) {
 			err = -EEXIST;
 			mutex_unlock(&hugetlb_fault_mutex_table[hash]);
 			i_mmap_unlock_read(mapping);
-- 
2.31.1


  parent reply	other threads:[~2021-07-15 20:17 UTC|newest]

Thread overview: 55+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-07-15 20:13 [PATCH v5 00/26] userfaultfd-wp: Support shmem and hugetlbfs Peter Xu
2021-07-15 20:13 ` [PATCH v5 01/26] mm/shmem: Unconditionally set pte dirty in mfill_atomic_install_pte Peter Xu
2021-07-15 20:13 ` [PATCH v5 02/26] shmem/userfaultfd: Take care of UFFDIO_COPY_MODE_WP Peter Xu
2021-07-15 20:13 ` [PATCH v5 03/26] mm: Clear vmf->pte after pte_unmap_same() returns Peter Xu
2021-07-15 20:14 ` [PATCH v5 04/26] mm/userfaultfd: Introduce special pte for unmapped file-backed mem Peter Xu
2021-07-15 20:14 ` [PATCH v5 05/26] mm/swap: Introduce the idea of special swap ptes Peter Xu
2021-07-16  5:50   ` Alistair Popple
2021-07-16 19:11     ` Peter Xu
2021-07-21 11:28       ` Alistair Popple
2021-07-21 21:35         ` Peter Xu
2021-07-22  1:08           ` Alistair Popple
2021-07-22 15:21             ` Peter Xu
2021-07-15 20:14 ` [PATCH v5 06/26] shmem/userfaultfd: Handle uffd-wp special pte in page fault handler Peter Xu
2021-07-15 20:14 ` [PATCH v5 07/26] mm: Drop first_index/last_index in zap_details Peter Xu
2021-07-15 20:14 ` [PATCH v5 08/26] mm: Introduce zap_details.zap_flags Peter Xu
2021-07-15 20:14 ` [PATCH v5 09/26] mm: Introduce ZAP_FLAG_SKIP_SWAP Peter Xu
2021-07-15 20:14 ` [PATCH v5 10/26] shmem/userfaultfd: Persist uffd-wp bit across zapping for file-backed Peter Xu
2021-07-15 20:15 ` [PATCH v5 11/26] shmem/userfaultfd: Allow wr-protect none pte for file-backed mem Peter Xu
2021-07-15 20:16 ` [PATCH v5 12/26] shmem/userfaultfd: Allows file-back mem to be uffd wr-protected on thps Peter Xu
2021-07-15 20:16 ` [PATCH v5 13/26] shmem/userfaultfd: Handle the left-overed special swap ptes Peter Xu
2021-07-15 20:16 ` [PATCH v5 14/26] shmem/userfaultfd: Pass over uffd-wp special swap pte when fork() Peter Xu
2021-07-15 20:16 ` [PATCH v5 15/26] mm/hugetlb: Drop __unmap_hugepage_range definition from hugetlb.h Peter Xu
2021-07-15 20:16 ` [PATCH v5 16/26] mm/hugetlb: Introduce huge pte version of uffd-wp helpers Peter Xu
2021-07-15 20:16 ` [PATCH v5 17/26] hugetlb/userfaultfd: Hook page faults for uffd write protection Peter Xu
2021-07-20 15:37   ` kernel test robot
2021-07-20 15:37     ` kernel test robot
2021-07-21 21:50     ` Peter Xu
2021-07-21 21:50       ` Peter Xu
2021-07-15 20:16 ` [PATCH v5 18/26] hugetlb/userfaultfd: Take care of UFFDIO_COPY_MODE_WP Peter Xu
2021-07-20 23:59   ` kernel test robot
2021-07-20 23:59     ` kernel test robot
2021-07-15 20:16 ` [PATCH v5 19/26] hugetlb/userfaultfd: Handle UFFDIO_WRITEPROTECT Peter Xu
2021-07-21  8:24   ` kernel test robot
2021-07-21  8:24     ` kernel test robot
2021-07-15 20:16 ` [PATCH v5 20/26] mm/hugetlb: Introduce huge version of special swap pte helpers Peter Xu
2021-07-15 20:16 ` Peter Xu [this message]
2021-07-15 20:16 ` [PATCH v5 22/26] hugetlb/userfaultfd: Allow wr-protect none ptes Peter Xu
2021-07-15 20:16 ` [PATCH v5 23/26] hugetlb/userfaultfd: Only drop uffd-wp special pte if required Peter Xu
2021-07-15 20:16 ` [PATCH v5 24/26] mm/pagemap: Recognize uffd-wp bit for shmem/hugetlbfs Peter Xu
2021-07-19  9:53   ` Tiberiu Georgescu
2021-07-19 16:03     ` Peter Xu
2021-07-19 17:23       ` Tiberiu Georgescu
2021-07-19 17:56         ` Peter Xu
2021-07-21 14:38           ` Ivan Teterevkov
2021-07-21 16:19             ` David Hildenbrand
2021-07-21 19:54               ` Ivan Teterevkov
2021-07-21 22:28                 ` Peter Xu
2021-07-21 22:57                   ` Peter Xu
2021-07-22  6:27                     ` David Hildenbrand
2021-07-22 16:08                       ` Peter Xu
2021-07-15 20:16 ` [PATCH v5 25/26] mm/userfaultfd: Enable write protection for shmem & hugetlbfs Peter Xu
2021-07-15 20:16 ` [PATCH v5 26/26] userfaultfd/selftests: Enable uffd-wp for shmem/hugetlbfs Peter Xu
2021-07-19 19:21 ` [PATCH v5 00/26] userfaultfd-wp: Support shmem and hugetlbfs David Hildenbrand
2021-07-19 20:12   ` Peter Xu
2021-07-22 18:30 ` Peter Xu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210715201639.211982-1-peterx@redhat.com \
    --to=peterx@redhat.com \
    --cc=aarcange@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=apopple@nvidia.com \
    --cc=axelrasmussen@google.com \
    --cc=david@redhat.com \
    --cc=hughd@google.com \
    --cc=jgg@ziepe.ca \
    --cc=jglisse@redhat.com \
    --cc=kirill@shutemov.name \
    --cc=linmiaohe@huawei.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mike.kravetz@oracle.com \
    --cc=nadav.amit@gmail.com \
    --cc=rppt@linux.vnet.ibm.com \
    --cc=tiberiu.georgescu@nutanix.com \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.