All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Xu <peterx@redhat.com>
To: linux-mm@kvack.org, linux-kernel@vger.kernel.org
Cc: Nadav Amit <nadav.amit@gmail.com>,
	peterx@redhat.com, Alistair Popple <apopple@nvidia.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Mike Kravetz <mike.kravetz@oracle.com>,
	Mike Rapoport <rppt@linux.vnet.ibm.com>,
	Matthew Wilcox <willy@infradead.org>,
	Jerome Glisse <jglisse@redhat.com>,
	Axel Rasmussen <axelrasmussen@google.com>,
	"Kirill A . Shutemov" <kirill@shutemov.name>,
	David Hildenbrand <david@redhat.com>,
	Andrea Arcangeli <aarcange@redhat.com>,
	Hugh Dickins <hughd@google.com>
Subject: [PATCH v6 18/23] mm/hugetlb: Handle uffd-wp during fork()
Date: Mon, 15 Nov 2021 16:02:56 +0800	[thread overview]
Message-ID: <20211115080256.75095-1-peterx@redhat.com> (raw)
In-Reply-To: <20211115075522.73795-1-peterx@redhat.com>

Firstly, we'll need to pass in dst_vma into copy_hugetlb_page_range() because
for uffd-wp it's the dst vma that matters on deciding how we should treat
uffd-wp protected ptes.

We should recognize pte markers during fork and do the pte copy if needed.

Signed-off-by: Peter Xu <peterx@redhat.com>
---
 include/linux/hugetlb.h |  7 +++++--
 mm/hugetlb.c            | 41 +++++++++++++++++++++++++++--------------
 mm/memory.c             |  2 +-
 3 files changed, 33 insertions(+), 17 deletions(-)

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 4c3ea7ee8ce8..6935b02f1081 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -137,7 +137,8 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma,
 			     struct vm_area_struct *new_vma,
 			     unsigned long old_addr, unsigned long new_addr,
 			     unsigned long len);
-int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
+int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *,
+			    struct vm_area_struct *, struct vm_area_struct *);
 long follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
 			 struct page **, struct vm_area_struct **,
 			 unsigned long *, unsigned long *, long, unsigned int,
@@ -268,7 +269,9 @@ static inline struct page *follow_huge_addr(struct mm_struct *mm,
 }
 
 static inline int copy_hugetlb_page_range(struct mm_struct *dst,
-			struct mm_struct *src, struct vm_area_struct *vma)
+					  struct mm_struct *src,
+					  struct vm_area_struct *dst_vma,
+					  struct vm_area_struct *src_vma)
 {
 	BUG();
 	return 0;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 16fb9cd8d9c5..cf9a0e8c32ba 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4690,23 +4690,24 @@ hugetlb_install_page(struct vm_area_struct *vma, pte_t *ptep, unsigned long addr
 }
 
 int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
-			    struct vm_area_struct *vma)
+			    struct vm_area_struct *dst_vma,
+			    struct vm_area_struct *src_vma)
 {
 	pte_t *src_pte, *dst_pte, entry, dst_entry;
 	struct page *ptepage;
 	unsigned long addr;
-	bool cow = is_cow_mapping(vma->vm_flags);
-	struct hstate *h = hstate_vma(vma);
+	bool cow = is_cow_mapping(src_vma->vm_flags);
+	struct hstate *h = hstate_vma(src_vma);
 	unsigned long sz = huge_page_size(h);
 	unsigned long npages = pages_per_huge_page(h);
-	struct address_space *mapping = vma->vm_file->f_mapping;
+	struct address_space *mapping = src_vma->vm_file->f_mapping;
 	struct mmu_notifier_range range;
 	int ret = 0;
 
 	if (cow) {
-		mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, src,
-					vma->vm_start,
-					vma->vm_end);
+		mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, src_vma, src,
+					src_vma->vm_start,
+					src_vma->vm_end);
 		mmu_notifier_invalidate_range_start(&range);
 	} else {
 		/*
@@ -4718,12 +4719,12 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
 		i_mmap_lock_read(mapping);
 	}
 
-	for (addr = vma->vm_start; addr < vma->vm_end; addr += sz) {
+	for (addr = src_vma->vm_start; addr < src_vma->vm_end; addr += sz) {
 		spinlock_t *src_ptl, *dst_ptl;
 		src_pte = huge_pte_offset(src, addr, sz);
 		if (!src_pte)
 			continue;
-		dst_pte = huge_pte_alloc(dst, vma, addr, sz);
+		dst_pte = huge_pte_alloc(dst, dst_vma, addr, sz);
 		if (!dst_pte) {
 			ret = -ENOMEM;
 			break;
@@ -4758,6 +4759,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
 		} else if (unlikely(is_hugetlb_entry_migration(entry) ||
 				    is_hugetlb_entry_hwpoisoned(entry))) {
 			swp_entry_t swp_entry = pte_to_swp_entry(entry);
+			bool uffd_wp = huge_pte_uffd_wp(entry);
 
 			if (is_writable_migration_entry(swp_entry) && cow) {
 				/*
@@ -4767,10 +4769,21 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
 				swp_entry = make_readable_migration_entry(
 							swp_offset(swp_entry));
 				entry = swp_entry_to_pte(swp_entry);
+				if (userfaultfd_wp(src_vma) && uffd_wp)
+					entry = huge_pte_mkuffd_wp(entry);
 				set_huge_swap_pte_at(src, addr, src_pte,
 						     entry, sz);
 			}
+			if (!userfaultfd_wp(dst_vma) && uffd_wp)
+				entry = huge_pte_clear_uffd_wp(entry);
 			set_huge_swap_pte_at(dst, addr, dst_pte, entry, sz);
+		} else if (unlikely(is_pte_marker(entry))) {
+			/*
+			 * We copy the pte marker only if the dst vma has
+			 * uffd-wp enabled.
+			 */
+			if (userfaultfd_wp(dst_vma))
+				set_huge_pte_at(dst, addr, dst_pte, entry);
 		} else {
 			entry = huge_ptep_get(src_pte);
 			ptepage = pte_page(entry);
@@ -4785,20 +4798,20 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
 			 * need to be without the pgtable locks since we could
 			 * sleep during the process.
 			 */
-			if (unlikely(page_needs_cow_for_dma(vma, ptepage))) {
+			if (unlikely(page_needs_cow_for_dma(src_vma, ptepage))) {
 				pte_t src_pte_old = entry;
 				struct page *new;
 
 				spin_unlock(src_ptl);
 				spin_unlock(dst_ptl);
 				/* Do not use reserve as it's private owned */
-				new = alloc_huge_page(vma, addr, 1);
+				new = alloc_huge_page(dst_vma, addr, 1);
 				if (IS_ERR(new)) {
 					put_page(ptepage);
 					ret = PTR_ERR(new);
 					break;
 				}
-				copy_user_huge_page(new, ptepage, addr, vma,
+				copy_user_huge_page(new, ptepage, addr, dst_vma,
 						    npages);
 				put_page(ptepage);
 
@@ -4808,13 +4821,13 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
 				spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
 				entry = huge_ptep_get(src_pte);
 				if (!pte_same(src_pte_old, entry)) {
-					restore_reserve_on_error(h, vma, addr,
+					restore_reserve_on_error(h, dst_vma, addr,
 								new);
 					put_page(new);
 					/* dst_entry won't change as in child */
 					goto again;
 				}
-				hugetlb_install_page(vma, dst_pte, addr, new);
+				hugetlb_install_page(dst_vma, dst_pte, addr, new);
 				spin_unlock(src_ptl);
 				spin_unlock(dst_ptl);
 				continue;
diff --git a/mm/memory.c b/mm/memory.c
index 69a73d47513b..89715d1ec956 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1284,7 +1284,7 @@ copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma)
 		return 0;
 
 	if (is_vm_hugetlb_page(src_vma))
-		return copy_hugetlb_page_range(dst_mm, src_mm, src_vma);
+		return copy_hugetlb_page_range(dst_mm, src_mm, dst_vma, src_vma);
 
 	if (unlikely(src_vma->vm_flags & VM_PFNMAP)) {
 		/*
-- 
2.32.0


  parent reply	other threads:[~2021-11-15  8:04 UTC|newest]

Thread overview: 42+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-11-15  7:54 [PATCH v6 00/23] userfaultfd-wp: Support shmem and hugetlbfs Peter Xu
2021-11-15  7:55 ` [PATCH v6 01/23] mm: Introduce PTE_MARKER swap entry Peter Xu
2021-12-03  3:30   ` Alistair Popple
2021-12-03  4:21     ` Peter Xu
2021-12-03  5:35       ` Alistair Popple
2021-12-03  6:45         ` Peter Xu
2021-12-07  2:12           ` Alistair Popple
2021-12-07  2:30             ` Peter Xu
2021-11-15  7:55 ` [PATCH v6 02/23] mm: Teach core mm about pte markers Peter Xu
2021-11-15  7:55 ` [PATCH v6 03/23] mm: Check against orig_pte for finish_fault() Peter Xu
2021-12-16  5:01   ` Alistair Popple
2021-12-16  5:38     ` Peter Xu
2021-12-16  5:50       ` Peter Xu
2021-12-16  6:23         ` Alistair Popple
2021-12-16  7:06           ` Peter Xu
2021-12-16  7:45             ` Alistair Popple
2021-12-16  8:04               ` Peter Xu
2021-11-15  7:55 ` [PATCH v6 04/23] mm/uffd: PTE_MARKER_UFFD_WP Peter Xu
2021-12-16  5:18   ` Alistair Popple
2021-12-16  5:45     ` Peter Xu
2021-11-15  7:55 ` [PATCH v6 05/23] mm/shmem: Take care of UFFDIO_COPY_MODE_WP Peter Xu
2021-11-15  7:55 ` [PATCH v6 06/23] mm/shmem: Handle uffd-wp special pte in page fault handler Peter Xu
2021-12-16  5:56   ` Alistair Popple
2021-12-16  6:17     ` Peter Xu
2021-12-16  6:30       ` Alistair Popple
2021-11-15  7:55 ` [PATCH v6 07/23] mm/shmem: Persist uffd-wp bit across zapping for file-backed Peter Xu
2021-11-15  8:00 ` [PATCH v6 08/23] mm/shmem: Allow uffd wr-protect none pte for file-backed mem Peter Xu
2021-11-15  8:00 ` [PATCH v6 09/23] mm/shmem: Allows file-back mem to be uffd wr-protected on thps Peter Xu
2021-11-15  8:01 ` [PATCH v6 10/23] mm/shmem: Handle uffd-wp during fork() Peter Xu
2021-11-15  8:01 ` [PATCH v6 11/23] mm/hugetlb: Introduce huge pte version of uffd-wp helpers Peter Xu
2021-11-15  8:01 ` [PATCH v6 12/23] mm/hugetlb: Hook page faults for uffd write protection Peter Xu
2021-11-15  8:01 ` [PATCH v6 13/23] mm/hugetlb: Take care of UFFDIO_COPY_MODE_WP Peter Xu
2021-11-15  8:02 ` [PATCH v6 14/23] mm/hugetlb: Handle UFFDIO_WRITEPROTECT Peter Xu
2021-11-15  8:02 ` [PATCH v6 15/23] mm/hugetlb: Handle pte markers in page faults Peter Xu
2021-11-15  8:02 ` [PATCH v6 16/23] mm/hugetlb: Allow uffd wr-protect none ptes Peter Xu
2021-11-15  8:02 ` [PATCH v6 17/23] mm/hugetlb: Only drop uffd-wp special pte if required Peter Xu
2021-11-15  8:02 ` Peter Xu [this message]
2021-11-15  8:03 ` [PATCH v6 19/23] mm/khugepaged: Don't recycle vma pgtable if uffd-wp registered Peter Xu
2021-11-15  8:03 ` [PATCH v6 20/23] mm/pagemap: Recognize uffd-wp bit for shmem/hugetlbfs Peter Xu
2021-11-15  8:03 ` [PATCH v6 21/23] mm/uffd: Enable write protection for shmem & hugetlbfs Peter Xu
2021-11-15  8:03 ` [PATCH v6 22/23] mm: Enable PTE markers by default Peter Xu
2021-11-15  8:04 ` [PATCH v6 23/23] selftests/uffd: Enable uffd-wp for shmem/hugetlbfs Peter Xu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211115080256.75095-1-peterx@redhat.com \
    --to=peterx@redhat.com \
    --cc=aarcange@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=apopple@nvidia.com \
    --cc=axelrasmussen@google.com \
    --cc=david@redhat.com \
    --cc=hughd@google.com \
    --cc=jglisse@redhat.com \
    --cc=kirill@shutemov.name \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mike.kravetz@oracle.com \
    --cc=nadav.amit@gmail.com \
    --cc=rppt@linux.vnet.ibm.com \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.