All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Xu <peterx@redhat.com>
To: linux-mm@kvack.org, linux-kernel@vger.kernel.org
Cc: Hugh Dickins <hughd@google.com>, Maya Gokhale <gokhale2@llnl.gov>,
	Jerome Glisse <jglisse@redhat.com>,
	Johannes Weiner <hannes@cmpxchg.org>,
	peterx@redhat.com, Martin Cracauer <cracauer@cons.org>,
	Denis Plotnikov <dplotnikov@virtuozzo.com>,
	Shaohua Li <shli@fb.com>, Andrea Arcangeli <aarcange@redhat.com>,
	Pavel Emelyanov <xemul@parallels.com>,
	Mike Kravetz <mike.kravetz@oracle.com>,
	Marty McFadden <mcfadden8@llnl.gov>,
	Mike Rapoport <rppt@linux.vnet.ibm.com>,
	Mel Gorman <mgorman@suse.de>,
	"Kirill A . Shutemov" <kirill@shutemov.name>,
	"Dr . David Alan Gilbert" <dgilbert@redhat.com>
Subject: [PATCH RFC 19/24] userfaultfd: wp: support swap and page migration
Date: Mon, 21 Jan 2019 15:57:17 +0800	[thread overview]
Message-ID: <20190121075722.7945-20-peterx@redhat.com> (raw)
In-Reply-To: <20190121075722.7945-1-peterx@redhat.com>

For either swap and page migration, we all use the bit 2 of the entry to
identify whether this entry is uffd write-protected.  It plays a similar
role as the existing soft dirty bit in swap entries but only for keeping
the uffd-wp tracking for a specific PTE/PMD.

Something special here is that when we want to recover the uffd-wp bit
from a swap/migration entry to the PTE bit we'll also need to take care
of the _PAGE_RW bit and make sure it's cleared, otherwise even with the
_PAGE_UFFD_WP bit we can't trap it at all.

Note that this patch removed two lines from "userfaultfd: wp: hook
userfault handler to write protection fault" where we try to remove the
VM_FAULT_WRITE from vmf->flags when uffd-wp is set for the VMA.  This
patch will still keep the write flag there.

Signed-off-by: Peter Xu <peterx@redhat.com>
---
 include/linux/swapops.h | 2 ++
 mm/huge_memory.c        | 3 +++
 mm/memory.c             | 8 ++++++--
 mm/migrate.c            | 7 +++++++
 mm/mprotect.c           | 2 ++
 mm/rmap.c               | 6 ++++++
 6 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 4d961668e5fc..0c2923b1cdb7 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -68,6 +68,8 @@ static inline swp_entry_t pte_to_swp_entry(pte_t pte)
 
 	if (pte_swp_soft_dirty(pte))
 		pte = pte_swp_clear_soft_dirty(pte);
+	if (pte_swp_uffd_wp(pte))
+		pte = pte_swp_clear_uffd_wp(pte);
 	arch_entry = __pte_to_swp_entry(pte);
 	return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry));
 }
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 2a3ec62e83b6..682f1427da1a 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2171,6 +2171,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 		write = is_write_migration_entry(entry);
 		young = false;
 		soft_dirty = pmd_swp_soft_dirty(old_pmd);
+		uffd_wp = pmd_swp_uffd_wp(old_pmd);
 	} else {
 		page = pmd_page(old_pmd);
 		if (pmd_dirty(old_pmd))
@@ -2203,6 +2204,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 			entry = swp_entry_to_pte(swp_entry);
 			if (soft_dirty)
 				entry = pte_swp_mksoft_dirty(entry);
+			if (uffd_wp)
+				entry = pte_swp_mkuffd_wp(entry);
 		} else {
 			entry = mk_pte(page + i, READ_ONCE(vma->vm_page_prot));
 			entry = maybe_mkwrite(entry, vma);
diff --git a/mm/memory.c b/mm/memory.c
index f5497752d2a3..ac7d659e40fe 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -736,6 +736,8 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 				pte = swp_entry_to_pte(entry);
 				if (pte_swp_soft_dirty(*src_pte))
 					pte = pte_swp_mksoft_dirty(pte);
+				if (pte_swp_uffd_wp(*src_pte))
+					pte = pte_swp_mkuffd_wp(pte);
 				set_pte_at(src_mm, addr, src_pte, pte);
 			}
 		} else if (is_device_private_entry(entry)) {
@@ -2814,8 +2816,6 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
 	inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
 	dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS);
 	pte = mk_pte(page, vma->vm_page_prot);
-	if (userfaultfd_wp(vma))
-		vmf->flags &= ~FAULT_FLAG_WRITE;
 	if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) {
 		pte = maybe_mkwrite(pte_mkdirty(pte), vma);
 		vmf->flags &= ~FAULT_FLAG_WRITE;
@@ -2825,6 +2825,10 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
 	flush_icache_page(vma, page);
 	if (pte_swp_soft_dirty(vmf->orig_pte))
 		pte = pte_mksoft_dirty(pte);
+	if (pte_swp_uffd_wp(vmf->orig_pte)) {
+		pte = pte_mkuffd_wp(pte);
+		pte = pte_wrprotect(pte);
+	}
 	set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte);
 	arch_do_swap_page(vma->vm_mm, vma, vmf->address, pte, vmf->orig_pte);
 	vmf->orig_pte = pte;
diff --git a/mm/migrate.c b/mm/migrate.c
index f7e4bfdc13b7..963d3dd65cf0 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -242,6 +242,11 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma,
 		if (is_write_migration_entry(entry))
 			pte = maybe_mkwrite(pte, vma);
 
+		if (pte_swp_uffd_wp(*pvmw.pte)) {
+			pte = pte_mkuffd_wp(pte);
+			pte = pte_wrprotect(pte);
+		}
+
 		if (unlikely(is_zone_device_page(new))) {
 			if (is_device_private_page(new)) {
 				entry = make_device_private_entry(new, pte_write(pte));
@@ -2265,6 +2270,8 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
 			swp_pte = swp_entry_to_pte(entry);
 			if (pte_soft_dirty(pte))
 				swp_pte = pte_swp_mksoft_dirty(swp_pte);
+			if (pte_uffd_wp(pte))
+				swp_pte = pte_swp_mkuffd_wp(swp_pte);
 			set_pte_at(mm, addr, ptep, swp_pte);
 
 			/*
diff --git a/mm/mprotect.c b/mm/mprotect.c
index c37c9aa7a54e..2ce62d806108 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -187,6 +187,8 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 				newpte = swp_entry_to_pte(entry);
 				if (pte_swp_soft_dirty(oldpte))
 					newpte = pte_swp_mksoft_dirty(newpte);
+				if (pte_swp_uffd_wp(oldpte))
+					newpte = pte_swp_mkuffd_wp(newpte);
 				set_pte_at(mm, addr, pte, newpte);
 
 				pages++;
diff --git a/mm/rmap.c b/mm/rmap.c
index 85b7f9423352..e1cf191db4f3 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1463,6 +1463,8 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 			swp_pte = swp_entry_to_pte(entry);
 			if (pte_soft_dirty(pteval))
 				swp_pte = pte_swp_mksoft_dirty(swp_pte);
+			if (pte_uffd_wp(pteval))
+				swp_pte = pte_swp_mkuffd_wp(swp_pte);
 			set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
 			/*
 			 * No need to invalidate here it will synchronize on
@@ -1555,6 +1557,8 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 			swp_pte = swp_entry_to_pte(entry);
 			if (pte_soft_dirty(pteval))
 				swp_pte = pte_swp_mksoft_dirty(swp_pte);
+			if (pte_uffd_wp(pteval))
+				swp_pte = pte_swp_mkuffd_wp(swp_pte);
 			set_pte_at(mm, address, pvmw.pte, swp_pte);
 			/*
 			 * No need to invalidate here it will synchronize on
@@ -1621,6 +1625,8 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 			swp_pte = swp_entry_to_pte(entry);
 			if (pte_soft_dirty(pteval))
 				swp_pte = pte_swp_mksoft_dirty(swp_pte);
+			if (pte_uffd_wp(pteval))
+				swp_pte = pte_swp_mkuffd_wp(swp_pte);
 			set_pte_at(mm, address, pvmw.pte, swp_pte);
 			/* Invalidate as we cleared the pte */
 			mmu_notifier_invalidate_range(mm, address,
-- 
2.17.1


  parent reply	other threads:[~2019-01-21  8:00 UTC|newest]

Thread overview: 65+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-01-21  7:56 [PATCH RFC 00/24] userfaultfd: write protection support Peter Xu
2019-01-21  7:56 ` [PATCH RFC 01/24] mm: gup: rename "nonblocking" to "locked" where proper Peter Xu
2019-01-21 10:20   ` Mike Rapoport
2019-01-21  7:57 ` [PATCH RFC 02/24] mm: userfault: return VM_FAULT_RETRY on signals Peter Xu
2019-01-21 15:40   ` Jerome Glisse
2019-01-22  6:10     ` Peter Xu
2019-01-21  7:57 ` [PATCH RFC 03/24] mm: allow VM_FAULT_RETRY for multiple times Peter Xu
2019-01-21 15:55   ` Jerome Glisse
2019-01-22  8:22     ` Peter Xu
2019-01-22 16:53       ` Jerome Glisse
2019-01-23  2:12         ` Peter Xu
2019-01-23  2:39           ` Jerome Glisse
2019-01-24  5:45             ` Peter Xu
2019-01-21  7:57 ` [PATCH RFC 04/24] mm: gup: " Peter Xu
2019-01-21 16:24   ` Jerome Glisse
2019-01-24  7:05     ` Peter Xu
2019-01-24 15:34       ` Jerome Glisse
2019-01-25  2:49         ` Peter Xu
2019-01-21  7:57 ` [PATCH RFC 05/24] userfaultfd: wp: add helper for writeprotect check Peter Xu
2019-01-21 10:23   ` Mike Rapoport
2019-01-22  8:31     ` Peter Xu
2019-01-21  7:57 ` [PATCH RFC 06/24] userfaultfd: wp: support write protection for userfault vma range Peter Xu
2019-01-21 10:20   ` Mike Rapoport
2019-01-22  8:55     ` Peter Xu
2019-01-21 14:05   ` Jerome Glisse
2019-01-22  9:39     ` Peter Xu
2019-01-22 17:02       ` Jerome Glisse
2019-01-23  2:17         ` Peter Xu
2019-01-23  2:43           ` Jerome Glisse
2019-01-24  5:47             ` Peter Xu
2019-01-21  7:57 ` [PATCH RFC 07/24] userfaultfd: wp: add the writeprotect API to userfaultfd ioctl Peter Xu
2019-01-21 10:42   ` Mike Rapoport
2019-01-24  4:56     ` Peter Xu
2019-01-24  7:27       ` Mike Rapoport
2019-01-24  9:28         ` Peter Xu
2019-01-25  7:54           ` Mike Rapoport
2019-01-25 10:12             ` Peter Xu
2019-01-21  7:57 ` [PATCH RFC 08/24] userfaultfd: wp: hook userfault handler to write protection fault Peter Xu
2019-01-21  7:57 ` [PATCH RFC 09/24] userfaultfd: wp: enabled write protection in userfaultfd API Peter Xu
2019-01-21  7:57 ` [PATCH RFC 10/24] userfaultfd: wp: add WP pagetable tracking to x86 Peter Xu
2019-01-21 15:09   ` Jerome Glisse
2019-01-24  5:16     ` Peter Xu
2019-01-24 15:40       ` Jerome Glisse
2019-01-25  3:30         ` Peter Xu
2019-01-21  7:57 ` [PATCH RFC 11/24] userfaultfd: wp: userfaultfd_pte/huge_pmd_wp() helpers Peter Xu
2019-01-21  7:57 ` [PATCH RFC 12/24] userfaultfd: wp: add UFFDIO_COPY_MODE_WP Peter Xu
2019-01-21  7:57 ` [PATCH RFC 13/24] mm: merge parameters for change_protection() Peter Xu
2019-01-21 13:54   ` Jerome Glisse
2019-01-24  5:22     ` Peter Xu
2019-01-21  7:57 ` [PATCH RFC 14/24] userfaultfd: wp: apply _PAGE_UFFD_WP bit Peter Xu
2019-01-21  7:57 ` [PATCH RFC 15/24] mm: export wp_page_copy() Peter Xu
2019-01-21  7:57 ` [PATCH RFC 16/24] userfaultfd: wp: handle COW properly for uffd-wp Peter Xu
2019-01-21  7:57 ` [PATCH RFC 17/24] userfaultfd: wp: drop _PAGE_UFFD_WP properly when fork Peter Xu
2019-01-21  7:57 ` [PATCH RFC 18/24] userfaultfd: wp: add pmd_swp_*uffd_wp() helpers Peter Xu
2019-01-21  7:57 ` Peter Xu [this message]
2019-01-21  7:57 ` [PATCH RFC 20/24] userfaultfd: wp: don't wake up when doing write protect Peter Xu
2019-01-21 11:10   ` Mike Rapoport
2019-01-24  5:36     ` Peter Xu
2019-01-21  7:57 ` [PATCH RFC 21/24] khugepaged: skip collapse if uffd-wp detected Peter Xu
2019-01-21  7:57 ` [PATCH RFC 22/24] userfaultfd: wp: UFFDIO_REGISTER_MODE_WP documentation update Peter Xu
2019-01-21  7:57 ` [PATCH RFC 23/24] userfaultfd: selftests: refactor statistics Peter Xu
2019-01-21  7:57 ` [PATCH RFC 24/24] userfaultfd: selftests: add write-protect test Peter Xu
2019-01-21 14:33 ` [PATCH RFC 00/24] userfaultfd: write protection support David Hildenbrand
2019-01-22  3:18   ` Peter Xu
2019-01-22  8:59     ` David Hildenbrand

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190121075722.7945-20-peterx@redhat.com \
    --to=peterx@redhat.com \
    --cc=aarcange@redhat.com \
    --cc=cracauer@cons.org \
    --cc=dgilbert@redhat.com \
    --cc=dplotnikov@virtuozzo.com \
    --cc=gokhale2@llnl.gov \
    --cc=hannes@cmpxchg.org \
    --cc=hughd@google.com \
    --cc=jglisse@redhat.com \
    --cc=kirill@shutemov.name \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mcfadden8@llnl.gov \
    --cc=mgorman@suse.de \
    --cc=mike.kravetz@oracle.com \
    --cc=rppt@linux.vnet.ibm.com \
    --cc=shli@fb.com \
    --cc=xemul@parallels.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.